Skip to content

v0.2.53..v0.2.54 changeset ScriptMatchCreator.cpp

Garret Voltz edited this page Mar 31, 2020 · 1 revision
diff --git a/hoot-js/src/main/cpp/hoot/js/conflate/matching/ScriptMatchCreator.cpp b/hoot-js/src/main/cpp/hoot/js/conflate/matching/ScriptMatchCreator.cpp
index 2ba58b4..60849a2 100644
--- a/hoot-js/src/main/cpp/hoot/js/conflate/matching/ScriptMatchCreator.cpp
+++ b/hoot-js/src/main/cpp/hoot/js/conflate/matching/ScriptMatchCreator.cpp
@@ -90,18 +90,18 @@ public:
     _mt(mt),
     _script(script),
     _filter(filter),
-    _customSearchRadius(-1.0)
+    _customSearchRadius(-1.0),
+    _neighborCountMax(-1),
+    _neighborCountSum(0),
+    _elementsEvaluated(0),
+    _maxGroupSize(0),
+    _numElementsVisited(0),
+    _numMatchCandidatesVisited(0),
+    _taskStatusUpdateInterval(ConfigOptions().getTaskStatusUpdateInterval()),
+    _totalElementsToProcess(0)
   {
-    _neighborCountMax = -1;
-    _neighborCountSum = 0;
-    _elementsEvaluated = 0;
-    _maxGroupSize = 0;
-    _numElementsVisited = 0;
-    _numMatchCandidatesVisited = 0;
-    _taskStatusUpdateInterval = ConfigOptions().getTaskStatusUpdateInterval();
-
     // Calls to script functions/var are expensive, both memory-wise and processing-wise. Since this
-    // constructor gets called repeatedly by createMatch, keep them out of this constructor.
+    // constructor gets called repeatedly by createMatch, keep those calls out of this constructor.
 
     // Point/Polygon is not meant to conflate any polygons that are conflatable by other conflation
     // routines, hence the use of NonConflatableCriterion.
@@ -109,6 +109,8 @@ public:
       new ChainCriterion(
         ElementCriterionPtr(new PolygonCriterion()),
         ElementCriterionPtr(new NonConflatableCriterion(map))));
+
+    _timer.start();
   }
 
   ~ScriptMatchVisitor()
@@ -147,6 +149,7 @@ public:
   }
 
   virtual QString getDescription() const { return ""; }
+  virtual std::string getClassName() const { return ""; }
 
   void checkForMatch(const std::shared_ptr<const Element>& e)
   {
@@ -382,7 +385,7 @@ public:
   {
     if (!_index)
     {
-      LOG_INFO("Creating script feature index for: " << _scriptPath << "...");
+      LOG_STATUS("Creating script feature index for: " << _scriptPath << "...");
 
       // No tuning was done, I just copied these settings from OsmMapIndex.
       // 10 children - 368 - see #3054
@@ -398,6 +401,7 @@ public:
       // Point/Polygon conflation behaves diferently than all other generic scripts in that it
       // conflates geometries of different types. This class wasn't really originally designed to
       // handle that, so we add a logic path here to accommodate Point/Polygon.
+      long numElementsIndexed = 0;
       if (!_scriptPath.contains(ScriptMatchCreator::POINT_POLYGON_SCRIPT_NAME))
       {
         std::function<bool (ConstElementPtr)> f =
@@ -428,6 +432,7 @@ public:
             break;
         }
         v.finalizeIndex();
+        numElementsIndexed = v.getSize();
       }
       else
       {
@@ -440,10 +445,12 @@ public:
         getMap()->visitWaysRo(v);
         getMap()->visitRelationsRo(v);
         v.finalizeIndex();
+        numElementsIndexed = v.getSize();
       }
-      LOG_VART(_indexToEid.size());
 
-      LOG_DEBUG("Script feature index created for: " << _scriptPath << ".");
+      LOG_STATUS(
+        "Script feature index created for: " << _scriptPath << " with " <<
+        StringUtils::formatLargeNumber(numElementsIndexed) << " elements.");
     }
     return _index;
   }
@@ -577,12 +584,24 @@ public:
           " total elements.");
       }
     }
+
+    // if matching gets slow, throttle the log update interval accordingly.
+    if (_timer.elapsed() > 3000 && _taskStatusUpdateInterval >= 10)
+    {
+      _taskStatusUpdateInterval /= 10;
+    }
+    else if (_timer.elapsed() < 250 && _taskStatusUpdateInterval < 10000)
+    {
+      _taskStatusUpdateInterval *= 10;
+    }
+
     _numElementsVisited++;
-    if (_numElementsVisited % (_taskStatusUpdateInterval * 100) == 0)
+    if (_numElementsVisited % _taskStatusUpdateInterval == 0)
     {
       PROGRESS_INFO(
         "Processed " << StringUtils::formatLargeNumber(_numElementsVisited) << " / " <<
-        StringUtils::formatLargeNumber(getMap()->getElementCount()) << " elements.");
+        StringUtils::formatLargeNumber(_totalElementsToProcess) << " elements.");
+       _timer.restart();
     }
   }
 
@@ -596,7 +615,27 @@ public:
   void setCandidateDistanceSigma(double sigma) { _candidateDistanceSigma = sigma; }
 
   CreatorDescription getCreatorDescription() const { return _scriptInfo; }
-  void setCreatorDescription(const CreatorDescription& description) { _scriptInfo = description; }
+  void setCreatorDescription(const CreatorDescription& description)
+  {
+    _scriptInfo = description;
+
+    switch (_scriptInfo.geometryType)
+    {
+      case GeometryTypeCriterion::GeometryType::Point:
+        _totalElementsToProcess = getMap()->getNodeCount();
+        break;
+      case GeometryTypeCriterion::GeometryType::Line:
+        _totalElementsToProcess = getMap()->getWayCount() + getMap()->getRelationCount();
+        break;
+      case GeometryTypeCriterion::GeometryType::Polygon:
+        _totalElementsToProcess = getMap()->getWayCount() + getMap()->getRelationCount();
+        break;
+      default:
+        // visit all geometry types if the script didn't identify its geometry
+        _totalElementsToProcess = getMap()->size();
+        break;
+    }
+  }
 
   long getNumMatchCandidatesFound() const { return _numMatchCandidatesVisited; }
 
@@ -607,17 +646,37 @@ private:
   // don't hold on to the map.
   std::weak_ptr<const OsmMap> _map;
   Persistent<Object> _mapJs;
+
   vector<ConstMatchPtr>& _result;
-  set<ElementId> _empty;
+  ConstMatchThresholdPtr _mt;
+
+  std::shared_ptr<PluginContext> _script;
+
+  ElementCriterionPtr _filter;
+
+  //used for automatic search radius calculation; it is expected that this is set from the
+  //Javascript rules file used for the generic conflation
+  double _customSearchRadius;
+
   int _neighborCountMax;
   int _neighborCountSum;
-  long _elementCount;
   int _elementsEvaluated;
   size_t _maxGroupSize;
-  ConstMatchThresholdPtr _mt;
-  std::shared_ptr<PluginContext> _script;
+  long _numElementsVisited;
+  long _numMatchCandidatesVisited;
+
+  int _taskStatusUpdateInterval;
+
+  long _totalElementsToProcess;
+
+  std::shared_ptr<ChainCriterion> _pointPolyCrit;
+
+  QElapsedTimer _timer;
+
+  long _elementCount;
+
   CreatorDescription _scriptInfo;
-  ElementCriterionPtr _filter;
+
   Persistent<Function> _getSearchRadius;
 
   QHash<ElementId, bool> _matchCandidateCache;
@@ -629,16 +688,10 @@ private:
   deque<ElementId> _indexToEid;
 
   double _candidateDistanceSigma;
-  //used for automatic search radius calculation; it is expected that this is set from the
-  //Javascript rules file used for the generic conflation
-  double _customSearchRadius;
-  QString _scriptPath;
 
-  long _numElementsVisited;
-  long _numMatchCandidatesVisited;
-  int _taskStatusUpdateInterval;
+  QString _scriptPath;
 
-  std::shared_ptr<ChainCriterion> _pointPolyCrit;
+  set<ElementId> _empty;
 };
 
 ScriptMatchCreator::ScriptMatchCreator()
@@ -677,6 +730,7 @@ void ScriptMatchCreator::setArguments(QStringList args)
   //bit of a hack...see MatchCreator.h...need to refactor
   _description = QString::fromStdString(className()) + "," + args[0];
   _cachedScriptVisitor.reset();
+  _scriptInfo = _getScriptDescription(_scriptPath);
 
   LOG_DEBUG(
     "Set arguments for: " << className() << " - rules: " << QFileInfo(_scriptPath).fileName());
@@ -756,9 +810,8 @@ void ScriptMatchCreator::createMatches(
 
   ScriptMatchVisitor v(map, matches, threshold, _script, _filter);
   v.setScriptPath(_scriptPath);
-  const CreatorDescription scriptInfo = _getScriptDescription(_scriptPath);
-  _descriptionCache[_scriptPath] = scriptInfo;
-  v.setCreatorDescription(scriptInfo);
+  _descriptionCache[_scriptPath] = _scriptInfo;
+  v.setCreatorDescription(_scriptInfo);
   v.initSearchRadiusInfo();
   v.calculateSearchRadius();
 
@@ -781,16 +834,15 @@ void ScriptMatchCreator::createMatches(
       "within a search radius of " + QString::number(searchRadius, 'g', 2) + " meters";
   }
   LOG_STATUS(
-    "Looking for matches with: " << className() << ";" << scriptFileInfo.fileName() << " " <<
-     searchRadiusStr << "...");
+    "Looking for matches with: " << scriptFileInfo.fileName() << " " << searchRadiusStr << "...");
   LOG_VARD(*threshold);
   const int matchesSizeBefore = matches.size();
 
   _cachedCustomSearchRadii[_scriptPath] = searchRadius;
   _candidateDistanceSigmaCache[_scriptPath] = v.getCandidateDistanceSigma();
 
-  LOG_VARD(GeometryTypeCriterion::typeToString(scriptInfo.geometryType));
-  switch (scriptInfo.geometryType)
+  LOG_VARD(GeometryTypeCriterion::typeToString(_scriptInfo.geometryType));
+  switch (_scriptInfo.geometryType)
   {
     case GeometryTypeCriterion::GeometryType::Point:
       map->visitNodesRo(v);
@@ -810,7 +862,7 @@ void ScriptMatchCreator::createMatches(
   }
   const int matchesSizeAfter = matches.size();
 
-  QString matchType = CreatorDescription::baseFeatureTypeToString(scriptInfo.baseFeatureType);
+  QString matchType = CreatorDescription::baseFeatureTypeToString(_scriptInfo.baseFeatureType);
   // Workaround for the Point/Polygon script since it doesn't identify a base feature type. See
   // note in ScriptMatchVisitor::getIndex and rules/PointPolygon.js.
   if (_scriptPath.contains(POINT_POLYGON_SCRIPT_NAME))
@@ -949,6 +1001,24 @@ CreatorDescription ScriptMatchCreator::_getScriptDescription(QString path) const
     Handle<Value> value = ToLocal(&plugin)->Get(geometryTypeStr);
     result.geometryType = GeometryTypeCriterion::typeFromString(toCpp<QString>(value));
   }
+  // This controls which feature types a script conflates and is required. It allows for disabling
+  // superfluous conflate ops. It should probably be integrated with isMatchCandidate somehow at
+  // some point, if possible.
+  Handle<String> matchCandidateCriterionStr =
+    String::NewFromUtf8(current, "matchCandidateCriterion");
+  if (ToLocal(&plugin)->Has(matchCandidateCriterionStr))
+  {
+    Handle<Value> value = ToLocal(&plugin)->Get(matchCandidateCriterionStr);
+    const QString valueStr = toCpp<QString>(value);
+    if (valueStr.contains(";"))
+    {
+      result.matchCandidateCriteria = valueStr.split(";");
+    }
+    else
+    {
+      result.matchCandidateCriteria = QStringList(valueStr);
+    }
+  }
 
   QFileInfo fi(path);
   result.className = (QString::fromStdString(className()) + "," + fi.fileName()).toStdString();
@@ -1011,4 +1081,9 @@ QString ScriptMatchCreator::getName() const
   return QString::fromStdString(className()) + ";" + scriptFileInfo.fileName();
 }
 
+QStringList ScriptMatchCreator::getCriteria() const
+{
+  return _scriptInfo.matchCandidateCriteria;
+}
+
 }
Clone this wiki locally