Skip to content

v0.2.54..v0.2.55 changeset ElementDeduplicator.h

Garret Voltz edited this page Aug 14, 2020 · 1 revision
diff --git a/hoot-core/src/main/cpp/hoot/core/elements/ElementDeduplicator.h b/hoot-core/src/main/cpp/hoot/core/elements/ElementDeduplicator.h
new file mode 100644
index 0000000..f9cc1f6
--- /dev/null
+++ b/hoot-core/src/main/cpp/hoot/core/elements/ElementDeduplicator.h
@@ -0,0 +1,159 @@
+/*
+ * This file is part of Hootenanny.
+ *
+ * Hootenanny is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * --------------------------------------------------------------------
+ *
+ * The following copyright notices are generated automatically. If you
+ * have a new notice to add, please use the format:
+ * " * @copyright Copyright ..."
+ * This will properly maintain the copyright information. DigitalGlobe
+ * copyrights will be updated automatically.
+ *
+ * @copyright Copyright (C) 2020 DigitalGlobe (http://www.digitalglobe.com/)
+ */
+
+#ifndef ELEMENT_DEDUPLICATOR_H
+#define ELEMENT_DEDUPLICATOR_H
+
+// Hoot
+#include <hoot/core/elements/OsmMap.h>
+#include <hoot/core/criterion/ElementCriterion.h>
+
+namespace hoot
+{
+
+/**
+ * This class de-duplicates features within a map or across multiple maps. It ignores element IDs,
+ * versions and other OSM metadata.
+ *
+ * This has a more strict definition of way duplicates than DuplicateWayRemover.
+ *
+ * @todo For now, this is being tested from tests exercising ChangesetReplacementCreator, but it
+ * would be good to give it its own test eventually.
+ * @todo its convoluted to have both a boolean and a criterion for each element type; removal should
+ * be able to all be done with a single criterion and all elements, regardless of type, can be
+ * processed in one pass...will just take a decent amount of refactoring
+ * @todo Is there some overlap here with ElementComparer?
+ * @todo Is there any way this could replace DuplicateNodeRemover? I don't think so...
+ */
+class ElementDeduplicator
+{
+
+public:
+
+  ElementDeduplicator();
+
+  /**
+   * Removes intra-map duplicate elements from a map. Ignores element ID, version, changeset,
+   * and metadata tags
+   *
+   * @param map the map to de-duplicate
+   */
+  void dedupe(OsmMapPtr map);
+
+  /**
+   * Removes intra-map and inter-map duplicate elements from two maps. Ignores element ID, version,
+   * changeset, and metadata tags. The first map's elementsare kept when inter-map duplicate
+   * elements are found.
+   *
+   * @param map the first map to de-duplicate
+   * @param map2 the second map to de-duplicate
+   */
+  void dedupe(OsmMapPtr map1, OsmMapPtr map2);
+
+  void setDedupeIntraMap(bool dedupe) { _dedupeIntraMap = dedupe; }
+  void setDedupeNodes(bool dedupe) { _dedupeNodes = dedupe; }
+  void setDedupeWays(bool dedupe) { _dedupeWays = dedupe; }
+  void setDedupeRelations(bool dedupe) { _dedupeRelations = dedupe; }
+  void setNodeCriterion(ElementCriterionPtr crit) { _nodeCrit = crit; }
+  void setWayCriterion(ElementCriterionPtr crit) { _wayCrit = crit; }
+  void setRelationCriterion(ElementCriterionPtr crit) { _relationCrit = crit; }
+  void setFavorMoreConnectedWays(bool favor) { _favorMoreConnectedWays = favor; }
+
+private:
+
+  // If true, and two maps are being de-duped duplicates within each single map are also removed.
+  // This setting is ignored when de-deuping one map as intra-map de-deduplication is always done
+  // in that case.
+  bool _dedupeIntraMap;
+
+  // allows for controlling the element types being de-duped
+  bool _dedupeNodes;
+  bool _dedupeWays;
+  bool _dedupeRelations;
+
+  // criterion for each element type to further restrict what gets replaced
+  ElementCriterionPtr _nodeCrit;
+  ElementCriterionPtr _wayCrit;
+  ElementCriterionPtr _relationCrit;
+
+  // If true when ways are deduped, the way sharing more nodes with other ways is kept over the one
+  // with less shared nodes.
+  bool _favorMoreConnectedWays;
+
+  void _validateInputs();
+
+  /*
+   * Uses ElementHashVisitor to assign unique hashes to elements and also retrieves the element
+   * IDs of any duplicates found
+   */
+  void _calcElementHashes(
+    OsmMapPtr map, QMap<QString, ElementId>& hashes,
+    QSet<std::pair<ElementId, ElementId>>& duplicates);
+
+  /*
+   * Converts pairs of duplicated features' element IDs to a collection of element IDs sorted by
+   * element type for removal purposes; the second element is arbitarily selected for removal
+   */
+  QMap<ElementType::Type, QSet<ElementId>> _dupesToElementIds(
+    const QSet<std::pair<ElementId, ElementId>>& duplicates);
+
+  /*
+   * Similar to _dupesToElementIds except for ways if _favorMoreConnectedWays is enabled, this
+   * will use way connectedness as a factor in determining which element of the pair to mark for
+   * removal
+   */
+  void _dupesToElementIdsCheckMap(
+    const QSet<std::pair<ElementId, ElementId>>& duplicates, OsmMapPtr map1, OsmMapPtr map2,
+    QMap<ElementType::Type, QSet<ElementId>>& elementsToRemove,
+    QMap<ElementId, QString>& elementIdsToRemoveFromMap);
+
+  /*
+   * Converts element hashes of duplicated features' element IDs to a collection of element IDs
+   * sorted by element type for removal purposes; for ways if _favorMoreConnectedWays is enabled,
+   * this will use way connectedness as a factor in determining which element of the pair to mark
+   * for removal
+   */
+  void _dupeHashesToElementIdsCheckMap(
+    const QSet<QString>& sharedHashes, OsmMapPtr map1, OsmMapPtr map2,
+    const QMap<QString, ElementId>& map1Hashes, const QMap<QString, ElementId>& map2Hashes,
+    QMap<ElementType::Type, QSet<ElementId>>& elementsToRemove,
+    QMap<ElementId, QString>& elementIdsToRemoveFromMap);
+
+  void _removeElements(const QSet<ElementId>& elementsToRemove, OsmMapPtr map);
+
+  /*
+   * Similar to _removeElements except for ways if _favorMoreConnectedWays is enabled, this
+   * will use way connectedness as a factor in determining which element of the pair to remove
+   */
+  void _removeWaysCheckMap(
+    const QSet<ElementId>& waysToRemove, OsmMapPtr map1, OsmMapPtr map2,
+    const QMap<ElementId, QString>& elementIdsToRemoveFromMap);
+};
+
+}
+
+#endif // ELEMENT_DEDUPLICATOR_H
Clone this wiki locally