Skip to content

v0.2.54..v0.2.55 changeset ConflateCmd.cpp

Garret Voltz edited this page Aug 14, 2020 · 1 revision
diff --git a/hoot-core/src/main/cpp/hoot/core/cmd/ConflateCmd.cpp b/hoot-core/src/main/cpp/hoot/core/cmd/ConflateCmd.cpp
index 585ad23..5a99135 100644
--- a/hoot-core/src/main/cpp/hoot/core/cmd/ConflateCmd.cpp
+++ b/hoot-core/src/main/cpp/hoot/core/cmd/ConflateCmd.cpp
@@ -54,7 +54,7 @@
 #include <hoot/core/util/StringUtils.h>
 #include <hoot/core/visitors/CountUniqueReviewsVisitor.h>
 #include <hoot/core/util/ConfigUtils.h>
-#include <hoot/core/elements/OsmUtils.h>
+#include <hoot/core/elements/VersionUtils.h>
 #include <hoot/core/ops/RemoveRoundabouts.h>
 #include <hoot/core/ops/ReplaceRoundabouts.h>
 #include <hoot/core/criterion/PointCriterion.h>
@@ -62,7 +62,10 @@
 #include <hoot/core/criterion/PolygonCriterion.h>
 #include <hoot/core/conflate/matching/MatchFactory.h>
 #include <hoot/core/ops/MapCleaner.h>
-
+#include <hoot/core/io/ChangesetStatsFormat.h>
+#include <hoot/core/util/FileUtils.h>
+#include <hoot/core/conflate/SuperfluousConflateOpRemover.h>
+#include <hoot/core/util/MemoryUsageChecker.h>
 
 // Standard
 #include <fstream>
@@ -108,27 +111,22 @@ int ConflateCmd::runSimple(QStringList& args)
 
   BoundedCommand::runSimple(args);
 
+  // This parsing is for map stats. Changeset stats args for differential are processed further down
+  // below.
   QList<SingleStat> stats;
   bool displayStats = false;
-  //force stats to always be the last optional param so it can be followed by an optional
-  //output file
   QString outputStatsFile;
   if (args.contains("--stats"))
   {
-    if (args.endsWith("--stats"))
-    {
-      displayStats = true;
-      //remove "--stats" from args list
-      args.pop_back();
-    }
-    else if (args[args.size() - 2] == "--stats")
+    displayStats = true;
+    const int statsIndex = args.indexOf("--stats");
+    if (statsIndex != -1 && statsIndex != (args.size() - 1) &&
+        args[statsIndex + 1].toLower().endsWith(".json"))
     {
-      displayStats = true;
-      outputStatsFile = args[args.size() - 1];
-      //remove "--stats" and stats output file name from args list
-      args.pop_back();
-      args.pop_back();
+      outputStatsFile = args[statsIndex + 1];
+      args.removeAll(outputStatsFile);
     }
+    args.removeAll("--stats");
   }
   LOG_VARD(displayStats);
   LOG_VARD(outputStatsFile);
@@ -154,15 +152,63 @@ int ConflateCmd::runSimple(QStringList& args)
     }
   }
   LOG_VARD(isDiffConflate);
+  LOG_VARD(diffConflator.conflatingTags());
 
   // Check for separate output files (for geometry & tags)
   bool separateOutput = false;
   if (args.contains("--separate-output"))
   {
+    const QString errorMsg =
+      QString("--separate-output is only valid when combiend with the --differential and ") +
+      QString("--include-tags options.");
+    if (!isDiffConflate || !diffConflator.conflatingTags())
+    {
+      throw IllegalArgumentException(errorMsg);
+    }
     separateOutput = true;
     args.removeAt(args.indexOf("--separate-output"));
   }
+  LOG_VARD(separateOutput);
 
+  bool displayChangesetStats = false;
+  QString outputChangesetStatsFile;
+  if (args.contains("--changeset-stats"))
+  {
+    if (!isDiffConflate)
+    {
+      throw IllegalArgumentException(
+        "--changeset-stats is only valid when combined with the --differential option.");
+    }
+    else
+    {
+      displayChangesetStats = true;
+      const int statsIndex = args.indexOf("--changeset-stats");
+      LOG_VARD(statsIndex);
+      // If the input immediately after the changeset stats arg isn't a valid changeset stats file
+      // output format, we'll just silently skip it and assume we're outputting stats to the display
+      // only. This mimics how the map stats args and stats args in other commands are parsed. We
+      // may want to eventually return an error or warning here instead.
+      if (statsIndex != -1 && statsIndex != (args.size() - 1) &&
+          !args[statsIndex + 1].startsWith("--"))
+      {
+        outputChangesetStatsFile = args[statsIndex + 1];
+        QFileInfo changesetStatsInfo(outputChangesetStatsFile);
+        if (!ChangesetStatsFormat::isValidFileOutputFormat(changesetStatsInfo.completeSuffix()))
+        {
+          outputChangesetStatsFile = "";
+        }
+        else
+        {
+          args.removeAll(outputChangesetStatsFile);
+        }
+      }
+      args.removeAll("--changeset-stats");
+    }
+  }
+  LOG_VARD(displayChangesetStats);
+  LOG_VARD(outputChangesetStatsFile);
+
+  LOG_VARD(args.size() );
   if (args.size() < 3 || args.size() > 4)
   {
     cout << getHelp() << endl << endl;
@@ -177,12 +223,11 @@ int ConflateCmd::runSimple(QStringList& args)
   const QString input2 = args[1];
   QString output = args[2];
 
-  QFileInfo outputInfo(output);
-  LOG_VARD(outputInfo.dir().absolutePath());
-  const bool outputDirSuccess = QDir().mkpath(outputInfo.dir().absolutePath());
-  if (!outputDirSuccess)
+  if (!IoUtils::isUrl(output))
   {
-    throw IllegalArgumentException("Unable to create output path for: " + output);
+    // write the output dir now so we don't get a nasty surprise at the end of a long job that it
+    // can't be written
+    IoUtils::writeOutputDir(output);
   }
 
   QString osmApiDbUrl;
@@ -194,13 +239,22 @@ int ConflateCmd::runSimple(QStringList& args)
       throw IllegalArgumentException(
         QString("%1 with SQL changeset output takes four parameters.").arg(getName()));
     }
+    else if (displayChangesetStats)
+    {
+      throw IllegalArgumentException(
+        QString("Changeset statistics (--changeset-stats) may only be calculated with an XML ") +
+        QString("changeset output (.osc)."));
+    }
     osmApiDbUrl = args[3];
   }
   else if (args.size() > 3)
   {
     std::cout << getHelp() << std::endl << std::endl;
     throw IllegalArgumentException(
-      QString("%1 with output: " + output + " takes three parameters.").arg(getName()));
+      QString("%1 with output: " + output + " takes three parameters. You provided %2: %3")
+        .arg(getName())
+        .arg(args.size())
+        .arg(args.join(",")));
   }
 
   Progress progress(ConfigOptions().getJobId(), JOB_SOURCE, Progress::JobState::Running);
@@ -243,7 +297,7 @@ int ConflateCmd::runSimple(QStringList& args)
   {
     // Let's see if we can remove any ops in the configuration that will have no effect on the
     // feature types we're conflating in order to improve runtime performance.
-    _removeSuperfluousOps();
+    SuperfluousConflateOpRemover::removeSuperfluousOps();
   }
 
   // The number of steps here must be updated as you add/remove job steps in the logic.
@@ -309,12 +363,13 @@ int ConflateCmd::runSimple(QStringList& args)
     {
       if (output.endsWith(".osc") || output.endsWith(".osc.sql"))
       {
-        OsmUtils::checkVersionLessThanOneCountAndLogWarning(map);
+        VersionUtils::checkVersionLessThanOneCountAndLogWarning(map);
       }
 
       // Store original IDs for tag diff
       progress.set(
-        _getJobPercentComplete(currentTask - 1), "Storing original features for tag differential...");
+        _getJobPercentComplete(currentTask - 1),
+       "Storing original features for tag differential...");
       diffConflator.storeOriginalMap(map);
       diffConflator.markInputElements(map);
       currentTask++;
@@ -327,7 +382,9 @@ int ConflateCmd::runSimple(QStringList& args)
       map, input2, ConfigOptions().getConflateUseDataSourceIds2(), Status::Unknown2);
     currentTask++;
   }
-  LOG_STATUS("Conflating map with " << StringUtils::formatLargeNumber(map->size()) << " elements...");
+  MemoryUsageChecker::getInstance().check();
+  LOG_STATUS(
+    "Conflating map with " << StringUtils::formatLargeNumber(map->size()) << " elements...");
 
   double inputBytes = IoSingleStat(IoSingleStat::RChar).value - bytesRead;
   LOG_VART(inputBytes);
@@ -358,6 +415,7 @@ int ConflateCmd::runSimple(QStringList& args)
     stats.append(SingleStat("Calculate Stats for Input 2 Time (sec)", t.getElapsedAndRestart()));
     currentTask++;
   }
+  MemoryUsageChecker::getInstance().check();
 
   size_t initialElementCount = map->getElementCount();
   stats.append(SingleStat("Initial Element Count", initialElementCount));
@@ -450,7 +508,23 @@ int ConflateCmd::runSimple(QStringList& args)
     "Writing conflated output: ..." + output.right(maxFilePrintLength) + "...");
   if (isDiffConflate && (output.endsWith(".osc") || output.endsWith(".osc.sql")))
   {
-    diffConflator.writeChangeset(result, output, separateOutput, osmApiDbUrl);
+    // Get the changeset stats output format from the changeset stats file extension, or if no
+    // extension is there assume a text table output to the display.
+    ChangesetStatsFormat statsFormat;
+    if (displayChangesetStats)
+    {
+      if (!outputChangesetStatsFile.isEmpty())
+      {
+        QFileInfo changesetStatsFileInfo(outputChangesetStatsFile);
+        statsFormat.setFormat(
+          ChangesetStatsFormat::fromString(changesetStatsFileInfo.completeSuffix()));
+      }
+      else
+      {
+        statsFormat.setFormat(ChangesetStatsFormat::Text);
+      }
+    }
+    diffConflator.writeChangeset(result, output, separateOutput, statsFormat, osmApiDbUrl);
   }
   else
   {
@@ -536,6 +610,39 @@ int ConflateCmd::runSimple(QStringList& args)
     }
   }
 
+  if (displayChangesetStats)
+  {
+    if (outputChangesetStatsFile.isEmpty())
+    {
+      // output to display
+      LOG_STATUS("Changeset Geometry Stats:\n" << diffConflator.getGeometryChangesetStats());
+      if (diffConflator.conflatingTags())
+      {
+        LOG_STATUS("\nChangeset Tag Stats:\n" << diffConflator.getTagChangesetStats() << "\n");
+      }
+    }
+    else
+    {
+      // output to file
+      if (separateOutput)
+      {
+        // output separate files for geometry and tag change stats
+        FileUtils::writeFully(outputChangesetStatsFile, diffConflator.getGeometryChangesetStats());
+        if (diffConflator.conflatingTags())
+        {
+          QString tagsOutFile = outputChangesetStatsFile.replace(".json", "");
+          tagsOutFile.append(".tags.json");
+          FileUtils::writeFully(tagsOutFile, diffConflator.getTagChangesetStats());
+        }
+      }
+      else
+      {
+        // output a single stats file with both geometry and tags change stats
+        FileUtils::writeFully(outputChangesetStatsFile, diffConflator.getUnifiedChangesetStats());
+      }
+    }
+  }
+
   progress.set(
     1.0, Progress::JobState::Successful,
     "Conflation job completed in " +
@@ -557,206 +664,6 @@ float ConflateCmd::_getJobPercentComplete(const int currentTaskNum) const
   return (float)currentTaskNum / (float)_numTotalTasks;
 }
 
-void ConflateCmd::_removeSuperfluousOps()
-{
-  // get all crits involved in the current matcher configuration
-  const QSet<QString> matcherCrits = _getMatchCreatorCrits();
-
-  QSet<QString> removedOps;
-
-  // for each of the conflate pre/post and map cleaner transforms (if conflate pre/post specifies
-  // MapCleaner) filter out any that aren't associated with the same ElementCriterion as the ones
-  // associated with the matchers
-
-  const QStringList modifiedPreConflateOps =
-    _filterOutUnneededOps(
-      matcherCrits, ConfigOptions().getConflatePreOps(), removedOps);
-  if (modifiedPreConflateOps.size() != ConfigOptions().getConflatePreOps().size())
-  {
-    conf().set(ConfigOptions::getConflatePreOpsKey(), modifiedPreConflateOps);
-  }
-
-  const QStringList modifiedPostConflateOps =
-    _filterOutUnneededOps(
-      matcherCrits, ConfigOptions().getConflatePostOps(), removedOps);
-  if (modifiedPostConflateOps.size() != ConfigOptions().getConflatePostOps().size())
-  {
-    conf().set(ConfigOptions::getConflatePostOpsKey(), modifiedPostConflateOps);
-  }
-
-  const QString mapCleanerName = QString::fromStdString(MapCleaner::className());
-  if (modifiedPreConflateOps.contains(mapCleanerName) ||
-      modifiedPostConflateOps.contains(mapCleanerName))
-  {
-    const QStringList modifiedCleaningOps =
-      _filterOutUnneededOps(
-        matcherCrits, ConfigOptions().getMapCleanerTransforms(), removedOps);
-    if (modifiedCleaningOps.size() != ConfigOptions().getMapCleanerTransforms().size())
-    {
-      conf().set(ConfigOptions::getMapCleanerTransformsKey(), modifiedCleaningOps);
-    }
-  }
-
-  if (removedOps.size() > 0)
-  {
-    QStringList removedOpsList = removedOps.values();
-    qSort(removedOpsList);
-    LOG_INFO(
-      "Removed the following conflate pre/post operations with no relevance to the selected " <<
-      "matchers: " << removedOpsList.join(", "));
-  }
-}
-
-QStringList ConflateCmd::_filterOutUnneededOps(
-  const QSet<QString>& matcherCrits, const QStringList& ops, QSet<QString>& removedOps)
-{
-  LOG_TRACE("ops before: " << ops);
-
-  QStringList modifiedOps;
-
-  for (int i = 0; i < ops.size(); i++)
-  {
-    const QString opName = ops.at(i);
-    LOG_VART(opName);
-
-    // MapCleaner's ops are configured with map.cleaner.transforms, so don't exclude it here.
-    if (opName == QString::fromStdString(MapCleaner::className()))
-    {
-      modifiedOps.append(opName);
-      continue;
-    }
-
-    // All the ops should be map ops or element vis and, thus, support FilteredByCriteria, but
-    // we'll check anyway to be safe.
-    std::shared_ptr<FilteredByCriteria> op;
-    if (Factory::getInstance().hasBase<OsmMapOperation>(opName.toStdString()))
-    {
-      op =
-        std::dynamic_pointer_cast<FilteredByCriteria>(
-          std::shared_ptr<OsmMapOperation>(
-            Factory::getInstance().constructObject<OsmMapOperation>(opName)));
-    }
-    else if (Factory::getInstance().hasBase<ElementVisitor>(opName.toStdString()))
-    {
-      op =
-        std::dynamic_pointer_cast<FilteredByCriteria>(
-          std::shared_ptr<ElementVisitor>(
-            Factory::getInstance().constructObject<ElementVisitor>(opName)));
-    }
-
-    if (op)
-    {
-      // get all the class names of the crits that this op is associated with
-      const QStringList opCrits = op->getCriteria();
-      LOG_VART(opCrits);
-
-      // If the op is not associated with any crit, we assume it should never be disabled based on
-      // the feature type being conflated.
-      if (opCrits.isEmpty())
-      {
-        modifiedOps.append(opName);
-        continue;
-      }
-
-      // If any of the op's crits match with those in the matchers' list, we'll use it. Otherwise,
-      // we disable it.
-      bool opAdded = false;
-      for (int j = 0; j < opCrits.size(); j++)
-      {
-        const QString opCrit = opCrits.at(j);
-        if (matcherCrits.contains(opCrit))
-        {
-          modifiedOps.append(opName);
-          opAdded = true;
-          break;
-        }
-      }
-      if (!opAdded)
-      {
-        removedOps.insert(opName);
-      }
-    }
-    else
-    {
-      removedOps.insert(opName);
-    }
-  }
-
-  LOG_TRACE("ops after: " << modifiedOps);
-  LOG_VART(removedOps);
-  return modifiedOps;
-}
-
-QSet<QString> ConflateCmd::_getMatchCreatorCrits()
-{
-  QSet<QString> matcherCrits;
-
-  // get all of the matchers from our current config
-  std::vector<std::shared_ptr<MatchCreator>> matchCreators =
-      MatchFactory::getInstance().getCreators();
-  for (std::vector<std::shared_ptr<MatchCreator>>::const_iterator it = matchCreators.begin();
-       it != matchCreators.end(); ++it)
-  {
-    std::shared_ptr<MatchCreator> matchCreator = *it;
-    std::shared_ptr<FilteredByCriteria> critFilter =
-      std::dynamic_pointer_cast<FilteredByCriteria>(matchCreator);
-    const QStringList crits = critFilter->getCriteria();
-
-    // Technically, not sure we'd have to error out here, but it will be good to know if any
-    // matchers weren't configured with crits to keep conflate bugs from sneaking in over time.
-    if (crits.size() == 0)
-    {
-      throw HootException(
-        "Match creator: " + matchCreator->getName() +
-        " does not specify any associated feature type criteria.");
-    }
-
-    for (int i = 0; i < crits.size(); i++)
-    {
-      const QString critStr = crits.at(i);
-      LOG_VART(critStr);
-      // doublecheck this is a valid crit
-      if (Factory::getInstance().hasBase<ElementCriterion>(critStr.toStdString()))
-      {
-        // add the crit
-        matcherCrits.insert(critStr);
-
-        // also add any generic geometry crits the crit inherits from
-
-        const QStringList pointCrits =
-          GeometryTypeCriterion::getCriterionClassNamesByGeometryType(
-            GeometryTypeCriterion::GeometryType::Point);
-        LOG_VART(pointCrits);
-        if (pointCrits.contains(critStr))
-        {
-          matcherCrits.insert(QString::fromStdString(PointCriterion::className()));
-        }
-
-        const QStringList lineCrits =
-          GeometryTypeCriterion::getCriterionClassNamesByGeometryType(
-            GeometryTypeCriterion::GeometryType::Line);
-        LOG_VART(lineCrits);
-        if (lineCrits.contains(critStr))
-        {
-          matcherCrits.insert(QString::fromStdString(LinearCriterion::className()));
-        }
-
-        const QStringList polyCrits =
-          GeometryTypeCriterion::getCriterionClassNamesByGeometryType(
-            GeometryTypeCriterion::GeometryType::Polygon);
-        LOG_VART(polyCrits);
-        if (polyCrits.contains(critStr))
-        {
-          matcherCrits.insert(QString::fromStdString(PolygonCriterion::className()));
-        }
-      }
-    }
-  }
-
-  LOG_VART(matcherCrits);
-  return matcherCrits;
-}
-
 void ConflateCmd::_disableRoundaboutRemoval()
 {
   // This applies to both Attribute and Differential Conflation.
Clone this wiki locally