Skip to content

v0.2.55..v0.2.56 changeset AddressParser.cpp

Garret Voltz edited this page Aug 14, 2020 · 3 revisions
diff --git a/hoot-core/src/main/cpp/hoot/core/conflate/address/AddressParser.cpp b/hoot-core/src/main/cpp/hoot/core/conflate/address/AddressParser.cpp
index 75bf914..ea26f4f 100644
--- a/hoot-core/src/main/cpp/hoot/core/conflate/address/AddressParser.cpp
+++ b/hoot-core/src/main/cpp/hoot/core/conflate/address/AddressParser.cpp
@@ -33,6 +33,7 @@
 #include <hoot/core/algorithms/string/ExactStringDistance.h>
 #include <hoot/core/elements/OsmMap.h>
 #include <hoot/core/conflate/address/Address.h>
+#include <hoot/core/util/StringUtils.h>
 
 // libpostal
 #include <libpostal/libpostal.h>
@@ -42,7 +43,10 @@ namespace hoot
 
 AddressParser::AddressParser() :
 _allowLenientHouseNumberMatching(true),
-_preTranslateTagValuesToEnglish(false)
+_preTranslateTagValuesToEnglish(false),
+_parsedFromAddressTag(true),
+_isHouseNumRange(false),
+_isSubLetter(false)
 {
 }
 
@@ -107,9 +111,7 @@ int AddressParser::numAddressesRecursive(const ConstElementPtr& element, const O
   else if (element->getElementType() == ElementType::Way)
   {
     addresses = parseAddressesFromWayNodes(*std::dynamic_pointer_cast<const Way>(element), map);
-    // TODO: Uncommenting this blows up a few of the poi/poly regression tests by forcing some
-    // address comparisons. See #3627.
-    //addresses.append(parseAddresses(*element));
+    addresses.append(parseAddresses(*element));
   }
   else if (element->getElementType() == ElementType::Relation)
   {
@@ -127,6 +129,10 @@ int AddressParser::numAddressesRecursive(const ConstElementPtr& element, const O
 QList<Address> AddressParser::parseAddresses(const Element& element,
                                              const bool normalizeAddresses) const
 {
+  _parsedFromAddressTag = true;
+  _isHouseNumRange = false;
+  _isSubLetter = false;
+
   // Make this call here, so that we don't cause it to be done unnecessarily as part of this
   // class's init when its a mem var on another class, since this init is expensive.
   LibPostalInit::getInstance();
@@ -137,6 +143,7 @@ QList<Address> AddressParser::parseAddresses(const Element& element,
   QString houseNum;
   QString street;
   const QSet<QString> parsedAddresses = _parseAddresses(element, houseNum, street);
+  LOG_TRACE("Parsed " << parsedAddresses.size() << " addresses for " << element.getElementId());
   LOG_VART(parsedAddresses);
 
   // add the parsed addresses to a collection in which they will later be compared to each other
@@ -146,11 +153,11 @@ QList<Address> AddressParser::parseAddresses(const Element& element,
     QString parsedAddress = *parsedAddressItr;
     LOG_VART(parsedAddress);
 
-    //optional additional lang pre-normalization translation
+    // optional additional lang pre-normalization translation
     if (_preTranslateTagValuesToEnglish)
     {
-      //only translating the street portion of the address string...the number isn't translatable
-       assert(!street.isEmpty());
+      // only translating the street portion of the address string...the number isn't translatable
+      assert(!street.isEmpty());
       const QString preTranslatedStreet = _addressTranslator.translateToEnglish(street);
       if (!preTranslatedStreet.isEmpty())
       {
@@ -165,7 +172,7 @@ QList<Address> AddressParser::parseAddresses(const Element& element,
 
     if (normalizeAddresses)
     {
-      //normalize and translate the address strings, so we end up comparing apples to apples
+      // normalize and translate the address strings, so we end up comparing apples to apples
       const QSet<QString> normalizedAddresses = _addressNormalizer.normalizeAddress(parsedAddress);
       LOG_VART(normalizedAddresses);
 
@@ -175,6 +182,9 @@ QList<Address> AddressParser::parseAddresses(const Element& element,
         const QString normalizedAddress = *normalizedAddressItr;
         LOG_VART(normalizedAddress);
         Address address(normalizedAddress, _allowLenientHouseNumberMatching);
+        address.setParsedFromAddressTag(_parsedFromAddressTag);
+        address.setIsRange(_isHouseNumRange);
+        address.setIsSubLetter(_isSubLetter);
         if (!addresses.contains(address))
         {
           LOG_TRACE("Adding address: " << address << " for element: " << element.getElementId());
@@ -185,6 +195,9 @@ QList<Address> AddressParser::parseAddresses(const Element& element,
     else
     {
       Address address(parsedAddress, _allowLenientHouseNumberMatching);
+      address.setParsedFromAddressTag(_parsedFromAddressTag);
+      address.setIsRange(_isHouseNumRange);
+      address.setIsSubLetter(_isSubLetter);
       if (!addresses.contains(address))
       {
         LOG_TRACE("Adding address: " << address << " for element: " << element.getElementId());
@@ -226,7 +239,7 @@ QList<Address> AddressParser::parseAddressesFromRelationMembers(const Relation&
     ConstElementPtr member = map.getElement(relationMembers[i].getElementId());
     // If the poly contains the poi being compared to as a way node or relation member, then the
     // POI's address will be added to both the poly and poi group of addresses and yield a fake
-    // address match
+    // address match.
     if (skipElementId.isNull() || member->getElementId() != skipElementId)
     {
       if (member->getElementType() == ElementType::Node)
@@ -286,14 +299,20 @@ bool AddressParser::_isParseableAddressFromComponents(const Tags& tags, QString&
                                                       QString& street) const
 {
   // we only require a valid street address...no other higher order parts, like city, state, etc.
-  houseNum = AddressTagKeys::getInstance().getAddressTagValue(tags, "house_number");
-  street = AddressTagKeys::getInstance().getAddressTagValue(tags, "street").toLower();
+  houseNum = _addressNormalizer.getAddressTagKeys()->getAddressTagValue(tags, "house_number");
+  LOG_VART(houseNum);
+  street = _addressNormalizer.getAddressTagKeys()->getAddressTagValue(tags, "street").toLower();
+  LOG_VART(street);
   if (!houseNum.isEmpty() && !street.isEmpty())
   {
     LOG_TRACE("Found address from components: " << houseNum << ", " << street << ".");
     return true;
   }
-  return false;
+  else
+  {
+    LOG_TRACE("No parseable address present.");
+    return false;
+  }
 }
 
 bool AddressParser::_isRangeAddress(const QString& houseNum) const
@@ -301,14 +320,22 @@ bool AddressParser::_isRangeAddress(const QString& houseNum) const
   return houseNum.contains("-");
 }
 
-bool AddressParser::_isValidAddressStr(QString& address, QString& houseNum, QString& street) const
+bool AddressParser::_isSubLetterAddress(const QString& houseNum) const
+{
+  return StringUtils::hasAlphabeticCharacter(houseNum);
+}
+
+bool AddressParser::_isValidAddressStr(QString& address, QString& houseNum, QString& street,
+                                       const bool requireStreetTypeInIntersection) const
 {
+  LOG_VART(address);
+
   // use libpostal to break down the address string
   libpostal_address_parser_response_t* parsed =
     libpostal_parse_address(
       address.toUtf8().data(), libpostal_get_address_parser_default_options());
   /*
-   *Label: house_number, Component: 781
+    Label: house_number, Component: 781
     Label: road, Component: franklin ave
     Label: suburb, Component: crown heights
     Label: city_district, Component: brooklyn
@@ -323,7 +350,7 @@ bool AddressParser::_isValidAddressStr(QString& address, QString& houseNum, QStr
     LOG_VART(label);
     const QString component = QString::fromUtf8((const char*)parsed->components[i]);
     LOG_VART(component);
-    //we only care about the street address
+    // we only care about the street address
     if (label == "house_number")
     {
       houseNum = component;
@@ -335,12 +362,35 @@ bool AddressParser::_isValidAddressStr(QString& address, QString& houseNum, QStr
   }
   libpostal_address_parser_response_destroy(parsed);
 
-  // intersections won't have numbers; unfortunately this lets through a false positive, like:
-  // "lrv station-church street" - TODO: should this be re-enabled?
-  if (/*!houseNum.isEmpty() &&*/ !street.isEmpty())
+  LOG_VART(street);
+  LOG_VART(houseNum);
+  LOG_VART(requireStreetTypeInIntersection);
+  LOG_VART(Address::isStreetIntersectionAddress(street, requireStreetTypeInIntersection));
+  LOG_VART(Address::isStreetIntersectionAddress(address, requireStreetTypeInIntersection));
+
+  // street address with house number
+  if (!houseNum.isEmpty() && !street.isEmpty())
   {
     address = houseNum + " " + street;
     address = address.trimmed();
+    LOG_TRACE("Found address: " << address);
+    return true;
+  }
+  // intersections won't have numbers
+  else if (!street.isEmpty() &&
+           Address::isStreetIntersectionAddress(street, requireStreetTypeInIntersection))
+  {
+    address = street;
+    address = address.trimmed();
+    LOG_TRACE("Found intersection address: " << address);
+    return true;
+  }
+  // if libpostal didn't pull out a street, then let's see if the string passed in is an
+  // intersection
+  else if (Address::isStreetIntersectionAddress(address, requireStreetTypeInIntersection))
+  {
+    address = address.trimmed();
+    LOG_TRACE("Found intersection address: " << address);
     return true;
   }
   else
@@ -388,19 +438,27 @@ QSet<QString> AddressParser::_parseAddressFromComponents(const Tags& tags, QStri
     {
       // use custom logic for a address containing a range of addresses
       parsedAddresses = _parseAddressAsRange(houseNum, street);
+      _isHouseNumRange = true;
+      LOG_TRACE("Address is range address.");
     }
     else
     {
+      if (_isSubLetterAddress(houseNum))
+      {
+        _isSubLetter = true;
+        LOG_TRACE("Address is subletter address.");
+      }
+
       QString parsedAddress = houseNum + " ";
       const QString streetPrefix =
-        AddressTagKeys::getInstance().getAddressTagValue(tags, "street_prefix");
+        _addressNormalizer.getAddressTagKeys()->getAddressTagValue(tags, "street_prefix");
       if (!streetPrefix.isEmpty())
       {
         parsedAddress += streetPrefix + " ";
       }
       parsedAddress += street;
       const QString streetSuffix =
-        AddressTagKeys::getInstance().getAddressTagValue(tags, "street_suffix");
+        _addressNormalizer.getAddressTagKeys()->getAddressTagValue(tags, "street_suffix");
       if (!streetSuffix.isEmpty())
       {
         parsedAddress += " " + streetPrefix;
@@ -420,10 +478,9 @@ QString AddressParser::_parseAddressFromAltTags(const Tags& tags, QString& house
   LOG_TRACE("Parsing address from alt tags...");
   QString parsedAddress;
 
-  //let's always look in the name field; arguably, we could look in all of them instead of just
-  //one...
-  QSet<QString> additionalTagKeys = AddressTagKeys::getInstance().getAdditionalTagKeys();
-  additionalTagKeys = additionalTagKeys.unite(QSet<QString>::fromList(tags.getNameKeys()));
+  // let's always look in the name field; arguably, we could look in all of them instead of just
+  // one...
+  QSet<QString> additionalTagKeys = _addressNormalizer.getAddressTagKeys()->getAdditionalTagKeys();
   LOG_VART(additionalTagKeys);
 
   for (QSet<QString>::const_iterator tagItr = additionalTagKeys.begin();
@@ -439,6 +496,21 @@ QString AddressParser::_parseAddressFromAltTags(const Tags& tags, QString& house
     }
   }
 
+  additionalTagKeys = QSet<QString>::fromList(tags.getNameKeys());
+  LOG_VART(additionalTagKeys);
+  for (QSet<QString>::const_iterator tagItr = additionalTagKeys.begin();
+       tagItr != additionalTagKeys.end(); ++tagItr)
+  {
+    const QString tagKey = *tagItr;
+    QString tagVal = tags.get(tagKey);
+    if (!tagVal.isEmpty() && _isValidAddressStr(tagVal, houseNum, street, true))
+    {
+      parsedAddress = tagVal;
+      LOG_TRACE("Found address: " << parsedAddress << " from additional tag key: " << tagKey << ".");
+      break;
+    }
+  }
+
   return parsedAddress;
 }
 
@@ -450,11 +522,11 @@ QSet<QString> AddressParser::_parseAddresses(const Element& element, QString& ho
 
   // look for a full address tag first
   QString fullAddress =
-    AddressTagKeys::getInstance().getAddressTagValue(element.getTags(), "full_address");
+    _addressNormalizer.getAddressTagKeys()->getAddressTagValue(element.getTags(), "full_address");
   LOG_VART(fullAddress);
   if (fullAddress.isEmpty())
   {
-    // if we don't have the full address, let's try to get an address from parts
+    // If we don't have the full address, let's try to get an address from parts.
     parsedAddresses = _parseAddressFromComponents(element.getTags(), houseNum, street);
   }
   else
@@ -469,12 +541,13 @@ QSet<QString> AddressParser::_parseAddresses(const Element& element, QString& ho
 
   if (parsedAddresses.isEmpty())
   {
-    // we didn't find anything in the standard address tags, so let's try feature names and any user
-    // defined tags
+    // We didn't find anything in the standard address tags, so let's try feature names and any user
+    // defined tags.
     const QString parsedAddress = _parseAddressFromAltTags(element.getTags(), houseNum, street);
     if (!parsedAddress.isEmpty())
     {
       parsedAddresses.insert(parsedAddress);
+      _parsedFromAddressTag = false;
     }
   }
 
Clone this wiki locally