v0.2.55..v0.2.56 changeset AddressParser.cpp
Garret Voltz edited this page Aug 14, 2020
·
3 revisions
diff --git a/hoot-core/src/main/cpp/hoot/core/conflate/address/AddressParser.cpp b/hoot-core/src/main/cpp/hoot/core/conflate/address/AddressParser.cpp
index 75bf914..ea26f4f 100644
--- a/hoot-core/src/main/cpp/hoot/core/conflate/address/AddressParser.cpp
+++ b/hoot-core/src/main/cpp/hoot/core/conflate/address/AddressParser.cpp
@@ -33,6 +33,7 @@
#include <hoot/core/algorithms/string/ExactStringDistance.h>
#include <hoot/core/elements/OsmMap.h>
#include <hoot/core/conflate/address/Address.h>
+#include <hoot/core/util/StringUtils.h>
// libpostal
#include <libpostal/libpostal.h>
@@ -42,7 +43,10 @@ namespace hoot
AddressParser::AddressParser() :
_allowLenientHouseNumberMatching(true),
-_preTranslateTagValuesToEnglish(false)
+_preTranslateTagValuesToEnglish(false),
+_parsedFromAddressTag(true),
+_isHouseNumRange(false),
+_isSubLetter(false)
{
}
@@ -107,9 +111,7 @@ int AddressParser::numAddressesRecursive(const ConstElementPtr& element, const O
else if (element->getElementType() == ElementType::Way)
{
addresses = parseAddressesFromWayNodes(*std::dynamic_pointer_cast<const Way>(element), map);
- // TODO: Uncommenting this blows up a few of the poi/poly regression tests by forcing some
- // address comparisons. See #3627.
- //addresses.append(parseAddresses(*element));
+ addresses.append(parseAddresses(*element));
}
else if (element->getElementType() == ElementType::Relation)
{
@@ -127,6 +129,10 @@ int AddressParser::numAddressesRecursive(const ConstElementPtr& element, const O
QList<Address> AddressParser::parseAddresses(const Element& element,
const bool normalizeAddresses) const
{
+ _parsedFromAddressTag = true;
+ _isHouseNumRange = false;
+ _isSubLetter = false;
+
// Make this call here, so that we don't cause it to be done unnecessarily as part of this
// class's init when its a mem var on another class, since this init is expensive.
LibPostalInit::getInstance();
@@ -137,6 +143,7 @@ QList<Address> AddressParser::parseAddresses(const Element& element,
QString houseNum;
QString street;
const QSet<QString> parsedAddresses = _parseAddresses(element, houseNum, street);
+ LOG_TRACE("Parsed " << parsedAddresses.size() << " addresses for " << element.getElementId());
LOG_VART(parsedAddresses);
// add the parsed addresses to a collection in which they will later be compared to each other
@@ -146,11 +153,11 @@ QList<Address> AddressParser::parseAddresses(const Element& element,
QString parsedAddress = *parsedAddressItr;
LOG_VART(parsedAddress);
- //optional additional lang pre-normalization translation
+ // optional additional lang pre-normalization translation
if (_preTranslateTagValuesToEnglish)
{
- //only translating the street portion of the address string...the number isn't translatable
- assert(!street.isEmpty());
+ // only translating the street portion of the address string...the number isn't translatable
+ assert(!street.isEmpty());
const QString preTranslatedStreet = _addressTranslator.translateToEnglish(street);
if (!preTranslatedStreet.isEmpty())
{
@@ -165,7 +172,7 @@ QList<Address> AddressParser::parseAddresses(const Element& element,
if (normalizeAddresses)
{
- //normalize and translate the address strings, so we end up comparing apples to apples
+ // normalize and translate the address strings, so we end up comparing apples to apples
const QSet<QString> normalizedAddresses = _addressNormalizer.normalizeAddress(parsedAddress);
LOG_VART(normalizedAddresses);
@@ -175,6 +182,9 @@ QList<Address> AddressParser::parseAddresses(const Element& element,
const QString normalizedAddress = *normalizedAddressItr;
LOG_VART(normalizedAddress);
Address address(normalizedAddress, _allowLenientHouseNumberMatching);
+ address.setParsedFromAddressTag(_parsedFromAddressTag);
+ address.setIsRange(_isHouseNumRange);
+ address.setIsSubLetter(_isSubLetter);
if (!addresses.contains(address))
{
LOG_TRACE("Adding address: " << address << " for element: " << element.getElementId());
@@ -185,6 +195,9 @@ QList<Address> AddressParser::parseAddresses(const Element& element,
else
{
Address address(parsedAddress, _allowLenientHouseNumberMatching);
+ address.setParsedFromAddressTag(_parsedFromAddressTag);
+ address.setIsRange(_isHouseNumRange);
+ address.setIsSubLetter(_isSubLetter);
if (!addresses.contains(address))
{
LOG_TRACE("Adding address: " << address << " for element: " << element.getElementId());
@@ -226,7 +239,7 @@ QList<Address> AddressParser::parseAddressesFromRelationMembers(const Relation&
ConstElementPtr member = map.getElement(relationMembers[i].getElementId());
// If the poly contains the poi being compared to as a way node or relation member, then the
// POI's address will be added to both the poly and poi group of addresses and yield a fake
- // address match
+ // address match.
if (skipElementId.isNull() || member->getElementId() != skipElementId)
{
if (member->getElementType() == ElementType::Node)
@@ -286,14 +299,20 @@ bool AddressParser::_isParseableAddressFromComponents(const Tags& tags, QString&
QString& street) const
{
// we only require a valid street address...no other higher order parts, like city, state, etc.
- houseNum = AddressTagKeys::getInstance().getAddressTagValue(tags, "house_number");
- street = AddressTagKeys::getInstance().getAddressTagValue(tags, "street").toLower();
+ houseNum = _addressNormalizer.getAddressTagKeys()->getAddressTagValue(tags, "house_number");
+ LOG_VART(houseNum);
+ street = _addressNormalizer.getAddressTagKeys()->getAddressTagValue(tags, "street").toLower();
+ LOG_VART(street);
if (!houseNum.isEmpty() && !street.isEmpty())
{
LOG_TRACE("Found address from components: " << houseNum << ", " << street << ".");
return true;
}
- return false;
+ else
+ {
+ LOG_TRACE("No parseable address present.");
+ return false;
+ }
}
bool AddressParser::_isRangeAddress(const QString& houseNum) const
@@ -301,14 +320,22 @@ bool AddressParser::_isRangeAddress(const QString& houseNum) const
return houseNum.contains("-");
}
-bool AddressParser::_isValidAddressStr(QString& address, QString& houseNum, QString& street) const
+bool AddressParser::_isSubLetterAddress(const QString& houseNum) const
+{
+ return StringUtils::hasAlphabeticCharacter(houseNum);
+}
+
+bool AddressParser::_isValidAddressStr(QString& address, QString& houseNum, QString& street,
+ const bool requireStreetTypeInIntersection) const
{
+ LOG_VART(address);
+
// use libpostal to break down the address string
libpostal_address_parser_response_t* parsed =
libpostal_parse_address(
address.toUtf8().data(), libpostal_get_address_parser_default_options());
/*
- *Label: house_number, Component: 781
+ Label: house_number, Component: 781
Label: road, Component: franklin ave
Label: suburb, Component: crown heights
Label: city_district, Component: brooklyn
@@ -323,7 +350,7 @@ bool AddressParser::_isValidAddressStr(QString& address, QString& houseNum, QStr
LOG_VART(label);
const QString component = QString::fromUtf8((const char*)parsed->components[i]);
LOG_VART(component);
- //we only care about the street address
+ // we only care about the street address
if (label == "house_number")
{
houseNum = component;
@@ -335,12 +362,35 @@ bool AddressParser::_isValidAddressStr(QString& address, QString& houseNum, QStr
}
libpostal_address_parser_response_destroy(parsed);
- // intersections won't have numbers; unfortunately this lets through a false positive, like:
- // "lrv station-church street" - TODO: should this be re-enabled?
- if (/*!houseNum.isEmpty() &&*/ !street.isEmpty())
+ LOG_VART(street);
+ LOG_VART(houseNum);
+ LOG_VART(requireStreetTypeInIntersection);
+ LOG_VART(Address::isStreetIntersectionAddress(street, requireStreetTypeInIntersection));
+ LOG_VART(Address::isStreetIntersectionAddress(address, requireStreetTypeInIntersection));
+
+ // street address with house number
+ if (!houseNum.isEmpty() && !street.isEmpty())
{
address = houseNum + " " + street;
address = address.trimmed();
+ LOG_TRACE("Found address: " << address);
+ return true;
+ }
+ // intersections won't have numbers
+ else if (!street.isEmpty() &&
+ Address::isStreetIntersectionAddress(street, requireStreetTypeInIntersection))
+ {
+ address = street;
+ address = address.trimmed();
+ LOG_TRACE("Found intersection address: " << address);
+ return true;
+ }
+ // if libpostal didn't pull out a street, then let's see if the string passed in is an
+ // intersection
+ else if (Address::isStreetIntersectionAddress(address, requireStreetTypeInIntersection))
+ {
+ address = address.trimmed();
+ LOG_TRACE("Found intersection address: " << address);
return true;
}
else
@@ -388,19 +438,27 @@ QSet<QString> AddressParser::_parseAddressFromComponents(const Tags& tags, QStri
{
// use custom logic for a address containing a range of addresses
parsedAddresses = _parseAddressAsRange(houseNum, street);
+ _isHouseNumRange = true;
+ LOG_TRACE("Address is range address.");
}
else
{
+ if (_isSubLetterAddress(houseNum))
+ {
+ _isSubLetter = true;
+ LOG_TRACE("Address is subletter address.");
+ }
+
QString parsedAddress = houseNum + " ";
const QString streetPrefix =
- AddressTagKeys::getInstance().getAddressTagValue(tags, "street_prefix");
+ _addressNormalizer.getAddressTagKeys()->getAddressTagValue(tags, "street_prefix");
if (!streetPrefix.isEmpty())
{
parsedAddress += streetPrefix + " ";
}
parsedAddress += street;
const QString streetSuffix =
- AddressTagKeys::getInstance().getAddressTagValue(tags, "street_suffix");
+ _addressNormalizer.getAddressTagKeys()->getAddressTagValue(tags, "street_suffix");
if (!streetSuffix.isEmpty())
{
parsedAddress += " " + streetPrefix;
@@ -420,10 +478,9 @@ QString AddressParser::_parseAddressFromAltTags(const Tags& tags, QString& house
LOG_TRACE("Parsing address from alt tags...");
QString parsedAddress;
- //let's always look in the name field; arguably, we could look in all of them instead of just
- //one...
- QSet<QString> additionalTagKeys = AddressTagKeys::getInstance().getAdditionalTagKeys();
- additionalTagKeys = additionalTagKeys.unite(QSet<QString>::fromList(tags.getNameKeys()));
+ // let's always look in the name field; arguably, we could look in all of them instead of just
+ // one...
+ QSet<QString> additionalTagKeys = _addressNormalizer.getAddressTagKeys()->getAdditionalTagKeys();
LOG_VART(additionalTagKeys);
for (QSet<QString>::const_iterator tagItr = additionalTagKeys.begin();
@@ -439,6 +496,21 @@ QString AddressParser::_parseAddressFromAltTags(const Tags& tags, QString& house
}
}
+ additionalTagKeys = QSet<QString>::fromList(tags.getNameKeys());
+ LOG_VART(additionalTagKeys);
+ for (QSet<QString>::const_iterator tagItr = additionalTagKeys.begin();
+ tagItr != additionalTagKeys.end(); ++tagItr)
+ {
+ const QString tagKey = *tagItr;
+ QString tagVal = tags.get(tagKey);
+ if (!tagVal.isEmpty() && _isValidAddressStr(tagVal, houseNum, street, true))
+ {
+ parsedAddress = tagVal;
+ LOG_TRACE("Found address: " << parsedAddress << " from additional tag key: " << tagKey << ".");
+ break;
+ }
+ }
+
return parsedAddress;
}
@@ -450,11 +522,11 @@ QSet<QString> AddressParser::_parseAddresses(const Element& element, QString& ho
// look for a full address tag first
QString fullAddress =
- AddressTagKeys::getInstance().getAddressTagValue(element.getTags(), "full_address");
+ _addressNormalizer.getAddressTagKeys()->getAddressTagValue(element.getTags(), "full_address");
LOG_VART(fullAddress);
if (fullAddress.isEmpty())
{
- // if we don't have the full address, let's try to get an address from parts
+ // If we don't have the full address, let's try to get an address from parts.
parsedAddresses = _parseAddressFromComponents(element.getTags(), houseNum, street);
}
else
@@ -469,12 +541,13 @@ QSet<QString> AddressParser::_parseAddresses(const Element& element, QString& ho
if (parsedAddresses.isEmpty())
{
- // we didn't find anything in the standard address tags, so let's try feature names and any user
- // defined tags
+ // We didn't find anything in the standard address tags, so let's try feature names and any user
+ // defined tags.
const QString parsedAddress = _parseAddressFromAltTags(element.getTags(), houseNum, street);
if (!parsedAddress.isEmpty())
{
parsedAddresses.insert(parsedAddress);
+ _parsedFromAddressTag = false;
}
}