v0.2.50..v0.2.51 changeset AddressScoreExtractor.cpp
Garret Voltz edited this page Jan 15, 2020
·
1 revision
diff --git a/hoot-core/src/main/cpp/hoot/core/algorithms/extractors/AddressScoreExtractor.cpp b/hoot-core/src/main/cpp/hoot/core/algorithms/extractors/AddressScoreExtractor.cpp
new file mode 100644
index 0000000..7173b25
--- /dev/null
+++ b/hoot-core/src/main/cpp/hoot/core/algorithms/extractors/AddressScoreExtractor.cpp
@@ -0,0 +1,148 @@
+/*
+ * This file is part of Hootenanny.
+ *
+ * Hootenanny is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * --------------------------------------------------------------------
+ *
+ * The following copyright notices are generated automatically. If you
+ * have a new notice to add, please use the format:
+ * " * @copyright Copyright ..."
+ * This will properly maintain the copyright information. DigitalGlobe
+ * copyrights will be updated automatically.
+ *
+ * @copyright Copyright (C) 2016, 2017, 2018, 2019 DigitalGlobe (http://www.digitalglobe.com/)
+ */
+#include "AddressScoreExtractor.h"
+
+// hoot
+#include <hoot/core/util/Factory.h>
+#include <hoot/core/util/FileUtils.h>
+#include <hoot/core/util/Log.h>
+#include <hoot/core/util/ConfigOptions.h>
+#include <hoot/core/conflate/address/Address.h>
+#include <hoot/core/util/StringUtils.h>
+
+using namespace std;
+
+namespace hoot
+{
+
+HOOT_FACTORY_REGISTER(FeatureExtractor, AddressScoreExtractor)
+
+AddressScoreExtractor::AddressScoreExtractor() :
+_addressesProcessed(0),
+_matchAttemptMade(false)
+{
+}
+
+void AddressScoreExtractor::setConfiguration(const Settings& conf)
+{
+ ConfigOptions config = ConfigOptions(conf);
+
+ _addressParser.setAllowLenientHouseNumberMatching(
+ config.getAddressAllowLenientHouseNumberMatching());
+ _addressParser.setConfiguration(conf);
+
+ bool preTranslateTagValuesToEnglish = config.getAddressTranslateToEnglish();
+ // The default translation is what libpostal does during normalization and is always done. We're
+ // providing an option to some additional translation before normalization with a
+ // ToEnglishTranslator. If the default only option is enabled, it overrides any use of the
+ // ToEnglishTranslator.
+ if (config.getAddressUseDefaultLanguageTranslationOnly())
+ {
+ preTranslateTagValuesToEnglish = false;
+ }
+ _addressParser.setPreTranslateTagValuesToEnglish(preTranslateTagValuesToEnglish, conf);
+}
+
+QList<Address> AddressScoreExtractor::_getElementAddresses(
+ const OsmMap& map, const ConstElementPtr& element,
+ const ConstElementPtr& elementBeingComparedWith) const
+{
+ LOG_TRACE("Collecting addresses from: " << element->getElementId() << "...");
+ //LOG_VART(element);
+ QList<Address> elementAddresses = _addressParser.parseAddresses(*element);
+ if (elementAddresses.size() == 0)
+ {
+ //if not, try to find the address from a poly way node instead
+ if (element->getElementType() == ElementType::Way)
+ {
+ ConstWayPtr way = std::dynamic_pointer_cast<const Way>(element);
+ elementAddresses =
+ _addressParser.parseAddressesFromWayNodes(
+ *way, map, elementBeingComparedWith->getElementId());
+ }
+ //if still no luck, try to find the address from a poly way node that is a relation member
+ else if (element->getElementType() == ElementType::Relation)
+ {
+ ConstRelationPtr relation = std::dynamic_pointer_cast<const Relation>(element);
+ elementAddresses =
+ _addressParser.parseAddressesFromRelationMembers(
+ *relation, map, elementBeingComparedWith->getElementId());
+ }
+ }
+ return elementAddresses;
+}
+
+double AddressScoreExtractor::extract(const OsmMap& map, const ConstElementPtr& element1,
+ const ConstElementPtr& element2) const
+{
+ //Experimented with partial addresses matches in the past and it had no positive affect. Search
+ //the history for this class to see examples, to see if its worth experimenting with again at
+ //some point.
+
+ //see if the first element has any address
+ const QList<Address> element1Addresses = _getElementAddresses(map, element1, element2);
+ LOG_VART(element1Addresses.size());
+ if (element1Addresses.size() == 0)
+ {
+ LOG_TRACE("No element 1 addresses.");
+ return -1.0;
+ }
+
+ //see if the second element has an address
+ const QList<Address> element2Addresses = _getElementAddresses(map, element2, element1);
+ LOG_VART(element2Addresses.size());
+ if (element2Addresses.size() == 0)
+ {
+ LOG_TRACE("No element 2 addresses.");
+ return -1.0;
+ }
+
+ _matchAttemptMade = true;
+ _addressesProcessed += element2Addresses.size();
+ _addressesProcessed += element1Addresses.size();
+
+ //check for address matches
+ for (QList<Address>::const_iterator element2AddrItr = element2Addresses.begin();
+ element2AddrItr != element2Addresses.end(); ++element2AddrItr)
+ {
+ const Address element2Address = *element2AddrItr;
+ for (QList<Address>::const_iterator element1AddrItr = element1Addresses.begin();
+ element1AddrItr != element1Addresses.end(); ++element1AddrItr)
+ {
+ const Address element1Address = *element1AddrItr;
+ if (element2Address == element1Address)
+ {
+ LOG_TRACE("Found address match.");
+ return 1.0;
+ }
+ }
+ }
+
+ return 0.0;
+}
+
+}