v0.2.54..v0.2.55 changeset ConflateCmd.cpp
Garret Voltz edited this page Aug 14, 2020
·
1 revision
diff --git a/hoot-core/src/main/cpp/hoot/core/cmd/ConflateCmd.cpp b/hoot-core/src/main/cpp/hoot/core/cmd/ConflateCmd.cpp
index 585ad23..5a99135 100644
--- a/hoot-core/src/main/cpp/hoot/core/cmd/ConflateCmd.cpp
+++ b/hoot-core/src/main/cpp/hoot/core/cmd/ConflateCmd.cpp
@@ -54,7 +54,7 @@
#include <hoot/core/util/StringUtils.h>
#include <hoot/core/visitors/CountUniqueReviewsVisitor.h>
#include <hoot/core/util/ConfigUtils.h>
-#include <hoot/core/elements/OsmUtils.h>
+#include <hoot/core/elements/VersionUtils.h>
#include <hoot/core/ops/RemoveRoundabouts.h>
#include <hoot/core/ops/ReplaceRoundabouts.h>
#include <hoot/core/criterion/PointCriterion.h>
@@ -62,7 +62,10 @@
#include <hoot/core/criterion/PolygonCriterion.h>
#include <hoot/core/conflate/matching/MatchFactory.h>
#include <hoot/core/ops/MapCleaner.h>
-
+#include <hoot/core/io/ChangesetStatsFormat.h>
+#include <hoot/core/util/FileUtils.h>
+#include <hoot/core/conflate/SuperfluousConflateOpRemover.h>
+#include <hoot/core/util/MemoryUsageChecker.h>
// Standard
#include <fstream>
@@ -108,27 +111,22 @@ int ConflateCmd::runSimple(QStringList& args)
BoundedCommand::runSimple(args);
+ // This parsing is for map stats. Changeset stats args for differential are processed further down
+ // below.
QList<SingleStat> stats;
bool displayStats = false;
- //force stats to always be the last optional param so it can be followed by an optional
- //output file
QString outputStatsFile;
if (args.contains("--stats"))
{
- if (args.endsWith("--stats"))
- {
- displayStats = true;
- //remove "--stats" from args list
- args.pop_back();
- }
- else if (args[args.size() - 2] == "--stats")
+ displayStats = true;
+ const int statsIndex = args.indexOf("--stats");
+ if (statsIndex != -1 && statsIndex != (args.size() - 1) &&
+ args[statsIndex + 1].toLower().endsWith(".json"))
{
- displayStats = true;
- outputStatsFile = args[args.size() - 1];
- //remove "--stats" and stats output file name from args list
- args.pop_back();
- args.pop_back();
+ outputStatsFile = args[statsIndex + 1];
+ args.removeAll(outputStatsFile);
}
+ args.removeAll("--stats");
}
LOG_VARD(displayStats);
LOG_VARD(outputStatsFile);
@@ -154,15 +152,63 @@ int ConflateCmd::runSimple(QStringList& args)
}
}
LOG_VARD(isDiffConflate);
+ LOG_VARD(diffConflator.conflatingTags());
// Check for separate output files (for geometry & tags)
bool separateOutput = false;
if (args.contains("--separate-output"))
{
+ const QString errorMsg =
+ QString("--separate-output is only valid when combiend with the --differential and ") +
+ QString("--include-tags options.");
+ if (!isDiffConflate || !diffConflator.conflatingTags())
+ {
+ throw IllegalArgumentException(errorMsg);
+ }
separateOutput = true;
args.removeAt(args.indexOf("--separate-output"));
}
+ LOG_VARD(separateOutput);
+ bool displayChangesetStats = false;
+ QString outputChangesetStatsFile;
+ if (args.contains("--changeset-stats"))
+ {
+ if (!isDiffConflate)
+ {
+ throw IllegalArgumentException(
+ "--changeset-stats is only valid when combined with the --differential option.");
+ }
+ else
+ {
+ displayChangesetStats = true;
+ const int statsIndex = args.indexOf("--changeset-stats");
+ LOG_VARD(statsIndex);
+ // If the input immediately after the changeset stats arg isn't a valid changeset stats file
+ // output format, we'll just silently skip it and assume we're outputting stats to the display
+ // only. This mimics how the map stats args and stats args in other commands are parsed. We
+ // may want to eventually return an error or warning here instead.
+ if (statsIndex != -1 && statsIndex != (args.size() - 1) &&
+ !args[statsIndex + 1].startsWith("--"))
+ {
+ outputChangesetStatsFile = args[statsIndex + 1];
+ QFileInfo changesetStatsInfo(outputChangesetStatsFile);
+ if (!ChangesetStatsFormat::isValidFileOutputFormat(changesetStatsInfo.completeSuffix()))
+ {
+ outputChangesetStatsFile = "";
+ }
+ else
+ {
+ args.removeAll(outputChangesetStatsFile);
+ }
+ }
+ args.removeAll("--changeset-stats");
+ }
+ }
+ LOG_VARD(displayChangesetStats);
+ LOG_VARD(outputChangesetStatsFile);
+
+ LOG_VARD(args.size() );
if (args.size() < 3 || args.size() > 4)
{
cout << getHelp() << endl << endl;
@@ -177,12 +223,11 @@ int ConflateCmd::runSimple(QStringList& args)
const QString input2 = args[1];
QString output = args[2];
- QFileInfo outputInfo(output);
- LOG_VARD(outputInfo.dir().absolutePath());
- const bool outputDirSuccess = QDir().mkpath(outputInfo.dir().absolutePath());
- if (!outputDirSuccess)
+ if (!IoUtils::isUrl(output))
{
- throw IllegalArgumentException("Unable to create output path for: " + output);
+ // write the output dir now so we don't get a nasty surprise at the end of a long job that it
+ // can't be written
+ IoUtils::writeOutputDir(output);
}
QString osmApiDbUrl;
@@ -194,13 +239,22 @@ int ConflateCmd::runSimple(QStringList& args)
throw IllegalArgumentException(
QString("%1 with SQL changeset output takes four parameters.").arg(getName()));
}
+ else if (displayChangesetStats)
+ {
+ throw IllegalArgumentException(
+ QString("Changeset statistics (--changeset-stats) may only be calculated with an XML ") +
+ QString("changeset output (.osc)."));
+ }
osmApiDbUrl = args[3];
}
else if (args.size() > 3)
{
std::cout << getHelp() << std::endl << std::endl;
throw IllegalArgumentException(
- QString("%1 with output: " + output + " takes three parameters.").arg(getName()));
+ QString("%1 with output: " + output + " takes three parameters. You provided %2: %3")
+ .arg(getName())
+ .arg(args.size())
+ .arg(args.join(",")));
}
Progress progress(ConfigOptions().getJobId(), JOB_SOURCE, Progress::JobState::Running);
@@ -243,7 +297,7 @@ int ConflateCmd::runSimple(QStringList& args)
{
// Let's see if we can remove any ops in the configuration that will have no effect on the
// feature types we're conflating in order to improve runtime performance.
- _removeSuperfluousOps();
+ SuperfluousConflateOpRemover::removeSuperfluousOps();
}
// The number of steps here must be updated as you add/remove job steps in the logic.
@@ -309,12 +363,13 @@ int ConflateCmd::runSimple(QStringList& args)
{
if (output.endsWith(".osc") || output.endsWith(".osc.sql"))
{
- OsmUtils::checkVersionLessThanOneCountAndLogWarning(map);
+ VersionUtils::checkVersionLessThanOneCountAndLogWarning(map);
}
// Store original IDs for tag diff
progress.set(
- _getJobPercentComplete(currentTask - 1), "Storing original features for tag differential...");
+ _getJobPercentComplete(currentTask - 1),
+ "Storing original features for tag differential...");
diffConflator.storeOriginalMap(map);
diffConflator.markInputElements(map);
currentTask++;
@@ -327,7 +382,9 @@ int ConflateCmd::runSimple(QStringList& args)
map, input2, ConfigOptions().getConflateUseDataSourceIds2(), Status::Unknown2);
currentTask++;
}
- LOG_STATUS("Conflating map with " << StringUtils::formatLargeNumber(map->size()) << " elements...");
+ MemoryUsageChecker::getInstance().check();
+ LOG_STATUS(
+ "Conflating map with " << StringUtils::formatLargeNumber(map->size()) << " elements...");
double inputBytes = IoSingleStat(IoSingleStat::RChar).value - bytesRead;
LOG_VART(inputBytes);
@@ -358,6 +415,7 @@ int ConflateCmd::runSimple(QStringList& args)
stats.append(SingleStat("Calculate Stats for Input 2 Time (sec)", t.getElapsedAndRestart()));
currentTask++;
}
+ MemoryUsageChecker::getInstance().check();
size_t initialElementCount = map->getElementCount();
stats.append(SingleStat("Initial Element Count", initialElementCount));
@@ -450,7 +508,23 @@ int ConflateCmd::runSimple(QStringList& args)
"Writing conflated output: ..." + output.right(maxFilePrintLength) + "...");
if (isDiffConflate && (output.endsWith(".osc") || output.endsWith(".osc.sql")))
{
- diffConflator.writeChangeset(result, output, separateOutput, osmApiDbUrl);
+ // Get the changeset stats output format from the changeset stats file extension, or if no
+ // extension is there assume a text table output to the display.
+ ChangesetStatsFormat statsFormat;
+ if (displayChangesetStats)
+ {
+ if (!outputChangesetStatsFile.isEmpty())
+ {
+ QFileInfo changesetStatsFileInfo(outputChangesetStatsFile);
+ statsFormat.setFormat(
+ ChangesetStatsFormat::fromString(changesetStatsFileInfo.completeSuffix()));
+ }
+ else
+ {
+ statsFormat.setFormat(ChangesetStatsFormat::Text);
+ }
+ }
+ diffConflator.writeChangeset(result, output, separateOutput, statsFormat, osmApiDbUrl);
}
else
{
@@ -536,6 +610,39 @@ int ConflateCmd::runSimple(QStringList& args)
}
}
+ if (displayChangesetStats)
+ {
+ if (outputChangesetStatsFile.isEmpty())
+ {
+ // output to display
+ LOG_STATUS("Changeset Geometry Stats:\n" << diffConflator.getGeometryChangesetStats());
+ if (diffConflator.conflatingTags())
+ {
+ LOG_STATUS("\nChangeset Tag Stats:\n" << diffConflator.getTagChangesetStats() << "\n");
+ }
+ }
+ else
+ {
+ // output to file
+ if (separateOutput)
+ {
+ // output separate files for geometry and tag change stats
+ FileUtils::writeFully(outputChangesetStatsFile, diffConflator.getGeometryChangesetStats());
+ if (diffConflator.conflatingTags())
+ {
+ QString tagsOutFile = outputChangesetStatsFile.replace(".json", "");
+ tagsOutFile.append(".tags.json");
+ FileUtils::writeFully(tagsOutFile, diffConflator.getTagChangesetStats());
+ }
+ }
+ else
+ {
+ // output a single stats file with both geometry and tags change stats
+ FileUtils::writeFully(outputChangesetStatsFile, diffConflator.getUnifiedChangesetStats());
+ }
+ }
+ }
+
progress.set(
1.0, Progress::JobState::Successful,
"Conflation job completed in " +
@@ -557,206 +664,6 @@ float ConflateCmd::_getJobPercentComplete(const int currentTaskNum) const
return (float)currentTaskNum / (float)_numTotalTasks;
}
-void ConflateCmd::_removeSuperfluousOps()
-{
- // get all crits involved in the current matcher configuration
- const QSet<QString> matcherCrits = _getMatchCreatorCrits();
-
- QSet<QString> removedOps;
-
- // for each of the conflate pre/post and map cleaner transforms (if conflate pre/post specifies
- // MapCleaner) filter out any that aren't associated with the same ElementCriterion as the ones
- // associated with the matchers
-
- const QStringList modifiedPreConflateOps =
- _filterOutUnneededOps(
- matcherCrits, ConfigOptions().getConflatePreOps(), removedOps);
- if (modifiedPreConflateOps.size() != ConfigOptions().getConflatePreOps().size())
- {
- conf().set(ConfigOptions::getConflatePreOpsKey(), modifiedPreConflateOps);
- }
-
- const QStringList modifiedPostConflateOps =
- _filterOutUnneededOps(
- matcherCrits, ConfigOptions().getConflatePostOps(), removedOps);
- if (modifiedPostConflateOps.size() != ConfigOptions().getConflatePostOps().size())
- {
- conf().set(ConfigOptions::getConflatePostOpsKey(), modifiedPostConflateOps);
- }
-
- const QString mapCleanerName = QString::fromStdString(MapCleaner::className());
- if (modifiedPreConflateOps.contains(mapCleanerName) ||
- modifiedPostConflateOps.contains(mapCleanerName))
- {
- const QStringList modifiedCleaningOps =
- _filterOutUnneededOps(
- matcherCrits, ConfigOptions().getMapCleanerTransforms(), removedOps);
- if (modifiedCleaningOps.size() != ConfigOptions().getMapCleanerTransforms().size())
- {
- conf().set(ConfigOptions::getMapCleanerTransformsKey(), modifiedCleaningOps);
- }
- }
-
- if (removedOps.size() > 0)
- {
- QStringList removedOpsList = removedOps.values();
- qSort(removedOpsList);
- LOG_INFO(
- "Removed the following conflate pre/post operations with no relevance to the selected " <<
- "matchers: " << removedOpsList.join(", "));
- }
-}
-
-QStringList ConflateCmd::_filterOutUnneededOps(
- const QSet<QString>& matcherCrits, const QStringList& ops, QSet<QString>& removedOps)
-{
- LOG_TRACE("ops before: " << ops);
-
- QStringList modifiedOps;
-
- for (int i = 0; i < ops.size(); i++)
- {
- const QString opName = ops.at(i);
- LOG_VART(opName);
-
- // MapCleaner's ops are configured with map.cleaner.transforms, so don't exclude it here.
- if (opName == QString::fromStdString(MapCleaner::className()))
- {
- modifiedOps.append(opName);
- continue;
- }
-
- // All the ops should be map ops or element vis and, thus, support FilteredByCriteria, but
- // we'll check anyway to be safe.
- std::shared_ptr<FilteredByCriteria> op;
- if (Factory::getInstance().hasBase<OsmMapOperation>(opName.toStdString()))
- {
- op =
- std::dynamic_pointer_cast<FilteredByCriteria>(
- std::shared_ptr<OsmMapOperation>(
- Factory::getInstance().constructObject<OsmMapOperation>(opName)));
- }
- else if (Factory::getInstance().hasBase<ElementVisitor>(opName.toStdString()))
- {
- op =
- std::dynamic_pointer_cast<FilteredByCriteria>(
- std::shared_ptr<ElementVisitor>(
- Factory::getInstance().constructObject<ElementVisitor>(opName)));
- }
-
- if (op)
- {
- // get all the class names of the crits that this op is associated with
- const QStringList opCrits = op->getCriteria();
- LOG_VART(opCrits);
-
- // If the op is not associated with any crit, we assume it should never be disabled based on
- // the feature type being conflated.
- if (opCrits.isEmpty())
- {
- modifiedOps.append(opName);
- continue;
- }
-
- // If any of the op's crits match with those in the matchers' list, we'll use it. Otherwise,
- // we disable it.
- bool opAdded = false;
- for (int j = 0; j < opCrits.size(); j++)
- {
- const QString opCrit = opCrits.at(j);
- if (matcherCrits.contains(opCrit))
- {
- modifiedOps.append(opName);
- opAdded = true;
- break;
- }
- }
- if (!opAdded)
- {
- removedOps.insert(opName);
- }
- }
- else
- {
- removedOps.insert(opName);
- }
- }
-
- LOG_TRACE("ops after: " << modifiedOps);
- LOG_VART(removedOps);
- return modifiedOps;
-}
-
-QSet<QString> ConflateCmd::_getMatchCreatorCrits()
-{
- QSet<QString> matcherCrits;
-
- // get all of the matchers from our current config
- std::vector<std::shared_ptr<MatchCreator>> matchCreators =
- MatchFactory::getInstance().getCreators();
- for (std::vector<std::shared_ptr<MatchCreator>>::const_iterator it = matchCreators.begin();
- it != matchCreators.end(); ++it)
- {
- std::shared_ptr<MatchCreator> matchCreator = *it;
- std::shared_ptr<FilteredByCriteria> critFilter =
- std::dynamic_pointer_cast<FilteredByCriteria>(matchCreator);
- const QStringList crits = critFilter->getCriteria();
-
- // Technically, not sure we'd have to error out here, but it will be good to know if any
- // matchers weren't configured with crits to keep conflate bugs from sneaking in over time.
- if (crits.size() == 0)
- {
- throw HootException(
- "Match creator: " + matchCreator->getName() +
- " does not specify any associated feature type criteria.");
- }
-
- for (int i = 0; i < crits.size(); i++)
- {
- const QString critStr = crits.at(i);
- LOG_VART(critStr);
- // doublecheck this is a valid crit
- if (Factory::getInstance().hasBase<ElementCriterion>(critStr.toStdString()))
- {
- // add the crit
- matcherCrits.insert(critStr);
-
- // also add any generic geometry crits the crit inherits from
-
- const QStringList pointCrits =
- GeometryTypeCriterion::getCriterionClassNamesByGeometryType(
- GeometryTypeCriterion::GeometryType::Point);
- LOG_VART(pointCrits);
- if (pointCrits.contains(critStr))
- {
- matcherCrits.insert(QString::fromStdString(PointCriterion::className()));
- }
-
- const QStringList lineCrits =
- GeometryTypeCriterion::getCriterionClassNamesByGeometryType(
- GeometryTypeCriterion::GeometryType::Line);
- LOG_VART(lineCrits);
- if (lineCrits.contains(critStr))
- {
- matcherCrits.insert(QString::fromStdString(LinearCriterion::className()));
- }
-
- const QStringList polyCrits =
- GeometryTypeCriterion::getCriterionClassNamesByGeometryType(
- GeometryTypeCriterion::GeometryType::Polygon);
- LOG_VART(polyCrits);
- if (polyCrits.contains(critStr))
- {
- matcherCrits.insert(QString::fromStdString(PolygonCriterion::className()));
- }
- }
- }
- }
-
- LOG_VART(matcherCrits);
- return matcherCrits;
-}
-
void ConflateCmd::_disableRoundaboutRemoval()
{
// This applies to both Attribute and Differential Conflation.