Skip to content

v0.2.54..v0.2.55 changeset CollectionRelation.js

Garret Voltz edited this page Aug 14, 2020 · 1 revision
diff --git a/rules/CollectionRelation.js b/rules/CollectionRelation.js
new file mode 100644
index 0000000..77e87e1
--- /dev/null
+++ b/rules/CollectionRelation.js
@@ -0,0 +1,272 @@
+/**
+ * This script conflates collection relations (e.g route, administrative boundary relations, etc.). 
+ * It is meant to be run after all other matchers.
+ */
+
+"use strict";
+
+exports.candidateDistanceSigma = 1.0; // 1.0 * (CE95 + Worst CE95);
+exports.description = "Matches collection relations";
+
+// This matcher only sets match/miss/review values to 1.0, therefore the score thresholds aren't used. 
+// If that ever changes, then the generic score threshold configuration options used below should 
+// be replaced with custom score threshold configuration options.
+exports.matchThreshold = parseFloat(hoot.get("conflate.match.threshold.default"));
+exports.missThreshold = parseFloat(hoot.get("conflate.miss.threshold.default"));
+exports.reviewThreshold = parseFloat(hoot.get("conflate.review.threshold.default"));
+
+exports.searchRadius = parseFloat(hoot.get("search.radius.collection.relation"));
+exports.tagThreshold = parseFloat(hoot.get("collection.relation.tag.threshold"));
+exports.nameThreshold = parseFloat(hoot.get("collection.relation.name.threshold"));
+exports.experimental = false;
+exports.baseFeatureType = "CollectionRelation";
+exports.writeMatchedBy = hoot.get("writer.include.matched.by.tag");
+// TODO: should this be line?
+exports.geometryType = "polygon";
+
+// This is needed for disabling superfluous conflate ops. In the future, it may also
+// be used to replace exports.isMatchCandidate (see #3047).
+exports.matchCandidateCriterion = "hoot::CollectionRelationCriterion";
+
+var edgeDistanceExtractor = new hoot.EdgeDistanceExtractor();
+var angleHistExtractor = new hoot.AngleHistogramExtractor();
+// We may eventually want to try something other than the default name extractor here.
+var nameExtractor = new hoot.NameExtractor();
+
+var memberSimilarityExtractor = new hoot.RelationMemberSimilarityExtractor();
+
+/**
+ * Returns true if e is a candidate for a match. Implementing this method is
+ * optional, but may dramatically increase speed if you can cull some features
+ * early on. E.g. no need to check nodes for a polygon to polygon match.
+ */
+exports.isMatchCandidate = function(map, e)
+{
+  return isCollectionRelation(e);
+};
+
+/**
+ * If this function returns true then all overlapping matches will be treated
+ * as a group. For now that means if two matches overlap then the whole group
+ * will be marked as needing review.
+ *
+ * If this function returns false the conflation routines will attempt to 
+ * pick the best subset of matches that do not conflict.
+ */
+exports.isWholeGroup = function()
+{
+  return true;
+};
+
+function typeMismatch(e1, e2)
+{
+  var tags1 = e1.getTags();
+  var tags2 = e2.getTags();
+  var type1 = e1.getType();
+  var type2 = e2.getType();
+
+  // If the collection relations aren't filtered out properly by type beforehand the
+  // geometry checks afterward can become very expensive.
+
+  if (type1 != type2)
+  {    
+    hoot.trace("type mismatch; type1: " + type1 + "; type2: " + type2);
+    hoot.trace("mostSpecificType 1: " + mostSpecificType(e1));
+    hoot.trace("mostSpecificType 2: " + mostSpecificType(e2));
+    return true;
+  }
+  else if (type1 == "boundary" && tags1.get("boundary") == "administrative" && tags2.get("boundary") == "administrative" && tags1.get("admin_level") != tags2.get("admin_level"))
+  {
+    hoot.trace("admin_level mismatch");
+    return true;
+  }
+  else if ((type1 == "multipolygon" || type1 == "multilineString") && explicitTypeMismatch(e1, e2, exports.tagThreshold))
+  {
+    hoot.trace("multipoly/multilinestring type mismatch");
+    return true;
+  }
+  else if (type1 == "restriction" && tags1.get("restriction") != tags2.get("restriction"))
+  {
+    hoot.trace("restriction mismatch");
+    return true;
+  }
+  else if (type1 == "route" && tags1.get("route") != tags2.get("route"))
+  {
+    hoot.trace("route mismatch");
+    return true;
+  }
+
+  return false;
+}
+
+function nameMismatch(map, e1, e2)
+{
+  var tags1 = e1.getTags();
+  var tags2 = e2.getTags();
+  var type1 = e1.getType();
+  var type2 = e2.getType();
+
+  var nameScore = 1.0;
+
+  // This may end up being too restrictive...
+  if (type1 == "route" && tags1.get("route") == "road" && tags2.get("route") == "road")
+  {
+    var ref1 = tags1.get("ref");
+    var ref2 = tags2.get("ref");
+    if (ref1 != ref2)
+    {
+      hoot.trace("highway ref mismatch; ref1: " + ref1 + ", ref2: " + ref2);
+      nameScore = 0.0;
+    }
+  }
+  // only score the name if both have one
+  else if (bothElementsHaveName(e1, e2))
+  {
+    nameScore = nameExtractor.extract(map, e1, e2);
+  }
+
+  if (nameScore < exports.nameThreshold)
+  {
+    hoot.trace("name mismatch; score: " + nameScore);
+    return true;
+  }
+
+  return false;
+}
+
+function geometryMismatch(map, e1, e2)
+{
+  // This is a little convoluted and may need further adjustment. Edge distance is pretty accurate 
+  // for this but gets expensive as the relations get larger. Angle hist is a little less accurate
+  // overall but runs faster and seems to be working ok for the largeer relations. For matching
+  // of disjointed relations (relations with different but connecting ways) a further check is
+  // needed (which also has the potential to be very expensive O(n^2)) and is only done for the
+  // larger relations when the geometry check fails.
+
+  // TODO: Should we be extracting sublines first and passing those to the extractors?
+
+  var numRelationMemberNodes = getNumRelationMemberNodes(map, e1.getElementId()) + getNumRelationMemberNodes(map, e2.getElementId());
+  if (numRelationMemberNodes < 2000) // Threshold determined off of one dataset...may need tweaking.
+  {
+    // This can become a fairly expensive check for relations with a lot of total nodes.
+    var edgeDist = edgeDistanceExtractor.extract(map, e1, e2);
+    if (edgeDist < 0.97)
+    { 
+      hoot.trace("match failed on edgeDist: " + edgeDist);
+      return true;
+    }
+  }
+  else
+  {
+    //hoot.trace("numRelationMemberNodes: " + numRelationMemberNodes);
+    var angleHist = angleHistExtractor.extract(map, e1, e2);
+    if (angleHist < 0.73)
+    { 
+      if (relationsHaveConnectedWayMembers(map, e1.getElementId(), e2.getElementId()))
+      {
+        hoot.trace("match failed on angleHist: " + angleHist + " but there are connected ways.");
+      }
+      else
+      {
+        hoot.trace("match failed on angleHist: " + angleHist);
+        return true;
+      }  
+    }
+  }
+  return false;
+}
+
+function memberSimilarityMismatch(map, e1, e2)
+{
+  // This hasn't panned out as being useful yet. This is meant to recognize relations with almost 
+  // identical members. We do encounter those, but the other checks (name, geometry) usually help 
+  // identify them beforehand and makes this check unnecessary.
+
+  var memberSim = memberSimilarityExtractor.extract(map, e1, e2);
+  if (memberSim < 0.75)
+  {
+    hoot.trace("match failed on memberSim: " + memberSim);
+    return true;
+  }
+  return false;
+}
+
+/**
+ * Returns the match score for the three class relationships.
+ * - match
+ * - miss
+ * - review
+ *
+ * The scores should always sum to one. If they don't you will be taunted 
+ * mercilessly and we'll normalize it anyway. :P
+ */
+exports.matchScore = function(map, e1, e2)
+{
+  var result = { miss: 1.0 };
+
+  if (e1.getStatusString() == e2.getStatusString()) 
+  {
+    return result;
+  }
+
+  var tags1 = e1.getTags();
+  var tags2 = e2.getTags();
+
+  hoot.trace("**********************************");
+  hoot.trace("e1: " + e1.getId() + ", " + tags1.get("name"));
+  if (tags1.get("note"))
+  {
+    hoot.trace("e1 note: " + tags1.get("note"));
+  }
+  hoot.trace("e2: " + e2.getId() + ", " + tags2.get("name"));
+  if (tags2.get("note"))
+  {
+    hoot.trace("e2 note: " + tags2.get("note"));
+  }
+  
+  // These checks were determined with a very small sample of test data and no model was used, so 
+  // may need further refinement.
+
+  if (typeMismatch(e1, e2))
+  {
+    return result;
+  }
+  if (nameMismatch(map, e1, e2))
+  {
+    return result;
+  }
+  if (geometryMismatch(map, e1, e2))
+  {
+    return result;
+  }
+
+  hoot.trace("match");
+  result = { match: 1.0, miss: 0.0, review: 0.0 };
+  return result;
+};
+
+/**
+ * Simpler version of the mergeSets function. Maybe only support this at first.
+ * It only supports merging two elements and the replaced list is determined
+ * implicitly based on the result.
+ */
+exports.mergePair = function(map, e1, e2)
+{
+  mergeCollectionRelations(map, e1.getElementId(), e2.getElementId());
+
+  e1.setStatusString("conflated");
+  if (exports.writeMatchedBy == "true")
+  {
+    // Technically, we should get this key from MetadataTags, but that's not integrated with hoot yet.
+    e1.setTag("hoot:matchedBy", exports.baseFeatureType);
+  }
+  return e1;
+};
+
+exports.getMatchFeatureDetails = function(map, e1, e2)
+{
+  var featureDetails = [];
+  return featureDetails;
+};
+
+
Clone this wiki locally