Skip to content

v0.2.48..v0.2.49 changeset WriteNameCountsCmd.cpp

Garret Voltz edited this page Oct 2, 2019 · 1 revision
diff --git a/hoot-core/src/main/cpp/hoot/core/cmd/WriteNameCountsCmd.cpp b/hoot-core/src/main/cpp/hoot/core/cmd/WriteNameCountsCmd.cpp
new file mode 100644
index 0000000..8a112b0
--- /dev/null
+++ b/hoot-core/src/main/cpp/hoot/core/cmd/WriteNameCountsCmd.cpp
@@ -0,0 +1,103 @@
+/*
+ * This file is part of Hootenanny.
+ *
+ * Hootenanny is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * --------------------------------------------------------------------
+ *
+ * The following copyright notices are generated automatically. If you
+ * have a new notice to add, please use the format:
+ * " * @copyright Copyright ..."
+ * This will properly maintain the copyright information. DigitalGlobe
+ * copyrights will be updated automatically.
+ *
+ * @copyright Copyright (C) 2018, 2019 DigitalGlobe (http://www.digitalglobe.com/)
+ */
+
+// Hoot
+#include <hoot/core/util/Factory.h>
+#include <hoot/core/cmd/BaseCommand.h>
+#include <hoot/core/io/WordCount.h>
+#include <hoot/core/io/WordCountWriter.h>
+#include <hoot/core/util/Log.h>
+#include <hoot/core/schema/TagDistribution.h>
+
+// Standard
+#include <map>
+
+namespace hoot
+{
+
+using namespace std;
+
+bool greaterThanWordCount(const WordCount& left, const WordCount& right)
+{
+  return left.count > right.count;
+}
+
+class WriteNameCountsCmd : public BaseCommand
+{
+public:
+
+  static string className() { return "hoot::WriteNameCountsCmd"; }
+
+  WriteNameCountsCmd() {}
+
+  virtual QString getName() const override { return "write-name-counts"; }
+
+  virtual QString getDescription() const override
+  { return "Writes name tag counts to a database file"; }
+
+  virtual int runSimple(QStringList& args) override
+  {
+    if (args.size() < 2)
+    {
+      cout << getHelp() << endl << endl;
+      throw HootException(QString("%1 takes at least two parameters.").arg(getName()));
+    }
+
+    // adding the source datetime just makes things really slow.
+    conf().set(ConfigOptions().getReaderAddSourceDatetimeKey(), false);
+
+    const QStringList inputs = args[0].split(";");
+    const QString output = args[1];
+
+    TagDistribution tagDist;
+    tagDist.setTagKeys(Tags::getNameKeys());
+    tagDist.setTokenize(true);
+
+    const std::map<QString, int> nameCounts = tagDist.getTagCounts(inputs);
+
+    long total = 0;
+    QVector<WordCount> wc;
+    wc.reserve(nameCounts.size());
+    for (std::map<QString, int>::const_iterator it = nameCounts.begin(); it != nameCounts.end();
+         ++it)
+    {
+      wc.push_back(WordCount(it->first, it->second));
+      total += it->second;
+    }
+
+    WordCountWriter writer(10 * 1000 * 1000);
+    writer.write(output, wc);
+
+    cout << "Total word count: " << total << endl;
+
+    return 0;
+  }
+};
+
+HOOT_FACTORY_REGISTER(Command, WriteNameCountsCmd)
+
+}
Clone this wiki locally