Skip to content

v0.2.47..v0.2.48 changeset OsmJsonReader.cpp

Garret Voltz edited this page Sep 27, 2019 · 1 revision
diff --git a/hoot-core/src/main/cpp/hoot/core/io/OsmJsonReader.cpp b/hoot-core/src/main/cpp/hoot/core/io/OsmJsonReader.cpp
index e61e6d1..c5520e4 100644
--- a/hoot-core/src/main/cpp/hoot/core/io/OsmJsonReader.cpp
+++ b/hoot-core/src/main/cpp/hoot/core/io/OsmJsonReader.cpp
@@ -35,6 +35,7 @@
 #include <hoot/core/util/HootException.h>
 #include <hoot/core/util/Log.h>
 #include <hoot/core/util/StringUtils.h>
+#include <hoot/core/util/IoUtils.h>
 
 // Boost
 #include <boost/foreach.hpp>
@@ -61,6 +62,7 @@ int OsmJsonReader::logWarnCount = 0;
 
 HOOT_FACTORY_REGISTER(OsmMapReader, OsmJsonReader)
 
+// TODO: implement Configurable to help simplify this
 OsmJsonReader::OsmJsonReader()
   : _defaultStatus(Status::Invalid),
     _useDataSourceIds(true),
@@ -78,7 +80,12 @@ OsmJsonReader::OsmJsonReader()
     _bboxContinue(true),
     _runParallel(ConfigOptions().getJsonReaderHttpBboxParallel()),
     _coordGridSize(ConfigOptions().getJsonReaderHttpBboxMaxSize()),
-    _threadCount(ConfigOptions().getJsonReaderHttpBboxThreadCount())
+    _threadCount(ConfigOptions().getJsonReaderHttpBboxThreadCount()),
+    _bounds(GeometryUtils::envelopeFromConfigString(ConfigOptions().getConvertBoundingBox())),
+    _keepImmediatelyConnectedWaysOutsideBounds(
+      ConfigOptions().getConvertBoundingBoxKeepImmediatelyConnectedWaysOutsideBounds()),
+    _missingNodeCount(0),
+    _missingWayCount(0)
 {
 }
 
@@ -97,8 +104,7 @@ bool OsmJsonReader::isSupported(const QString& url)
   // Is it a file?
   if (isRelativeUrl || isLocalFile)
   {
-    const QString filename = isRelativeUrl ? myUrl.toString() : myUrl.toLocalFile();
-    if (QFile::exists(filename) && url.endsWith(".json", Qt::CaseInsensitive))
+    if (url.endsWith(".json", Qt::CaseInsensitive) && !url.startsWith("http", Qt::CaseInsensitive))
     {
       return true;
     }
@@ -121,6 +127,7 @@ void OsmJsonReader::open(const QString& url)
   {
     _isFile = false;
     _isWeb = false;
+    _path = url;
 
     // Bail out if unsupported
     if (!isSupported(url))
@@ -162,6 +169,11 @@ void OsmJsonReader::close()
 
 void OsmJsonReader::read(const OsmMapPtr& map)
 {
+  // clear node id maps in case the reader is used for multiple files
+  _nodeIdMap.clear();
+  _relationIdMap.clear();
+  _wayIdMap.clear();
+
   _map = map;
   if (_isFile)
   {
@@ -177,9 +189,34 @@ void OsmJsonReader::read(const OsmMapPtr& map)
     _loadJSON(_results[i]);
     _parseOverpassJson();
   }
+  LOG_VARD(_map->getElementCount());
+
+  // See related note in OsmXmlReader::read.
+  if (!_bounds.isNull())
+  {
+    IoUtils::cropToBounds(_map, _bounds, _keepImmediatelyConnectedWaysOutsideBounds);
+    LOG_VARD(StringUtils::formatLargeNumber(_map->getElementCount()));
+  }
+}
+
+void OsmJsonReader::_readToMap()
+{
+  // clear node id maps in case the reader is used for multiple files
+  _nodeIdMap.clear();
+  _relationIdMap.clear();
+  _wayIdMap.clear();
+
+  _map.reset(new OsmMap());
+  _parseOverpassJson();
+  LOG_VARD(_map->getElementCount());
+
+  if (!_bounds.isNull())
+  {
+    IoUtils::cropToBounds(_map, _bounds, _keepImmediatelyConnectedWaysOutsideBounds);
+    LOG_VARD(StringUtils::formatLargeNumber(_map->getElementCount()));
+  }
 }
 
-// Throws HootException on error
 void OsmJsonReader::_loadJSON(const QString& jsonStr)
 {
   QString json(jsonStr);
@@ -223,19 +260,30 @@ void OsmJsonReader::_loadJSON(const QString& jsonStr)
   }
 }
 
+bool OsmJsonReader::isValidJson(const QString& jsonStr)
+{
+  try
+  {
+    _loadJSON(jsonStr);
+  }
+  catch (const HootException&)
+  {
+    return false;
+  }
+  return true;
+}
+
 OsmMapPtr OsmJsonReader::loadFromString(const QString& jsonStr)
 {
   _loadJSON(jsonStr);
-  _map.reset(new OsmMap());
-  _parseOverpassJson();
+  _readToMap();
   return _map;
 }
 
 OsmMapPtr OsmJsonReader::loadFromPtree(const boost::property_tree::ptree &tree)
 {
   _propTree = tree;
-  _map.reset(new OsmMap());
-  _parseOverpassJson();
+  _readToMap();
   return _map;
 }
 
@@ -250,16 +298,17 @@ OsmMapPtr OsmJsonReader::loadFromFile(const QString& path)
   QTextStream instream(&infile);
   QString jsonStr = instream.readAll();
   _loadJSON(jsonStr);
-  _map.reset(new OsmMap());
-  _parseOverpassJson();
+  _readToMap();
   return _map;
 }
 
 void OsmJsonReader::setConfiguration(const Settings& conf)
 {
-  _runParallel = ConfigOptions(conf).getJsonReaderHttpBboxParallel();
-  _coordGridSize = ConfigOptions(conf).getJsonReaderHttpBboxMaxSize();
-  _threadCount = ConfigOptions(conf).getJsonReaderHttpBboxThreadCount();
+  ConfigOptions opts(conf);
+  _runParallel = opts.getJsonReaderHttpBboxParallel();
+  _coordGridSize = opts.getJsonReaderHttpBboxMaxSize();
+  _threadCount = opts.getJsonReaderHttpBboxThreadCount();
+  setBounds(GeometryUtils::envelopeFromConfigString(opts.getConvertBoundingBox()));
 }
 
 void OsmJsonReader::_parseOverpassJson()
@@ -272,7 +321,8 @@ void OsmJsonReader::_parseOverpassJson()
 
   // Make a map, and iterate through all of our elements, adding them
   pt::ptree elements = _propTree.get_child("elements");
-  for (pt::ptree::const_iterator elementIt = elements.begin(); elementIt != elements.end(); ++elementIt)
+  for (pt::ptree::const_iterator elementIt = elements.begin(); elementIt != elements.end();
+       ++elementIt)
   {
     // Type can be node, way, or relation
     string typeStr = elementIt->second.get("type", string("--"));
@@ -304,20 +354,61 @@ void OsmJsonReader::_parseOverpassJson()
   }
 }
 
-void OsmJsonReader::_parseOverpassNode(const pt::ptree &item)
+void OsmJsonReader::_parseOverpassNode(const pt::ptree& item)
 {
+  const long debugId = -25928;
+
   // Get info we need to construct our node
-  long id = -1;
+  long id = item.get("id", id);
+
+  if (_nodeIdMap.contains(id))
+  {
+    throw HootException(
+      QString("Duplicate node id %1 in map %2 encountered.").arg(id).arg(_path));
+  }
+
+  long newId;
   if (_useDataSourceIds)
-    id = item.get("id", id);
+  {
+    newId = id;
+  }
+  else
+  {
+    newId = _map->createNextNodeId();
+  }
+  LOG_VART(id);
+  LOG_VART(newId);
+  _nodeIdMap.insert(id, newId);
+
+  const QString msg = "Reading " + ElementId(ElementType::Node, newId).toString() + "...";
+  if (newId == debugId)
+  {
+    LOG_VARD(msg);
+  }
   else
-    id = _map->createNextNodeId();
+  {
+    LOG_VART(msg);
+  }
 
   double lat = item.get("lat", 0.0);
   double lon = item.get("lon", 0.0);
 
+  long version = ElementData::VERSION_EMPTY;
+  version = item.get("version", version);
+  long changeset = ElementData::CHANGESET_EMPTY;
+  changeset = item.get("changeset", changeset);
+  unsigned int timestamp = ElementData::TIMESTAMP_EMPTY;
+  timestamp = item.get("timestamp", timestamp);
+  std::string user = ElementData::USER_EMPTY.toStdString();
+  user = item.get("user", user);
+  long uid = ElementData::UID_EMPTY;
+  uid = item.get("uid", uid);
+
   // Construct node
-  NodePtr pNode(new Node(_defaultStatus, id, lon, lat, _defaultCircErr));
+  NodePtr pNode(
+    new Node(
+      _defaultStatus, newId, lon, lat, _defaultCircErr, changeset, version, timestamp,
+      QString::fromStdString(user), uid));
 
   // Add tags
   _addTags(item, pNode);
@@ -325,6 +416,8 @@ void OsmJsonReader::_parseOverpassNode(const pt::ptree &item)
   // Add node to map
   _map->addNode(pNode);
 
+  LOG_TRACE("Loaded node: " << pNode);
+
   _numRead++;
   if (_numRead % _statusUpdateInterval == 0)
   {
@@ -332,17 +425,56 @@ void OsmJsonReader::_parseOverpassNode(const pt::ptree &item)
   }
 }
 
-void OsmJsonReader::_parseOverpassWay(const pt::ptree &item)
+void OsmJsonReader::_parseOverpassWay(const pt::ptree& item)
 {
+  const long debugId = -3047;
+
   // Get info we need to construct our way
-  long id = -1;
+  long id = item.get("id", id);
+
+  if (_wayIdMap.contains(id))
+  {
+    throw HootException(
+      QString("Duplicate way id %1 in map %2 encountered.").arg(id).arg(_path));
+  }
+
+  long newId;
   if (_useDataSourceIds)
-    id = item.get("id", id);
+  {
+    newId = id;
+  }
   else
-    id = _map->createNextWayId();
+  {
+    newId = _map->createNextWayId();
+  }
+  _wayIdMap.insert(id, newId);
+
+  const QString msg = "Reading " + ElementId(ElementType::Way, newId).toString() + "...";
+  if (newId == debugId)
+  {
+    LOG_VARD(msg);
+  }
+  else
+  {
+    LOG_VART(msg);
+  }
+
+  long version = ElementData::VERSION_EMPTY;
+  version = item.get("version", version);
+  long changeset = ElementData::CHANGESET_EMPTY;
+  changeset = item.get("changeset", changeset);
+  unsigned int timestamp = ElementData::TIMESTAMP_EMPTY;
+  timestamp = item.get("timestamp", timestamp);
+  std::string user = ElementData::USER_EMPTY.toStdString();
+  user = item.get("user", user);
+  long uid = ElementData::UID_EMPTY;
+  uid = item.get("uid", uid);
 
   // Construct Way
-  WayPtr pWay(new Way(_defaultStatus, id, _defaultCircErr));
+  WayPtr pWay(
+    new Way(
+      _defaultStatus, newId, _defaultCircErr, changeset, version, timestamp,
+      QString::fromStdString(user), uid));
 
   // Add nodes
   if (item.not_found() != item.find("nodes"))
@@ -352,7 +484,32 @@ void OsmJsonReader::_parseOverpassWay(const pt::ptree &item)
     while (nodeIt != nodes.end())
     {
       long v = nodeIt->second.get_value<long>();
-      pWay->addNode(v);
+      LOG_VART(v);
+
+      const bool nodePresent = _nodeIdMap.contains(v);
+      LOG_VART(nodePresent);
+      if (!nodePresent)
+      {
+        _missingNodeCount++;
+        if (logWarnCount < Log::getWarnMessageLimit())
+        {
+          LOG_WARN(
+            "Missing " << ElementId(ElementType::Node, v) << " in " <<
+            ElementId(ElementType::Way, newId) << ".");
+        }
+        else if (logWarnCount == Log::getWarnMessageLimit())
+        {
+          LOG_WARN(className() << ": " << Log::LOG_WARN_LIMIT_REACHED_MESSAGE);
+        }
+        logWarnCount++;
+      }
+      else
+      {
+        long newRef = _nodeIdMap.value(v);
+        LOG_TRACE("Adding way node: " << newRef << "...");
+        pWay->addNode(newRef);
+      }
+
       ++nodeIt;
     }
   }
@@ -363,6 +520,8 @@ void OsmJsonReader::_parseOverpassWay(const pt::ptree &item)
   // Add way to map
   _map->addWay(pWay);
 
+  LOG_TRACE("Loaded way: " << pWay);
+
   _numRead++;
   if (_numRead % _statusUpdateInterval == 0)
   {
@@ -370,17 +529,57 @@ void OsmJsonReader::_parseOverpassWay(const pt::ptree &item)
   }
 }
 
-void OsmJsonReader::_parseOverpassRelation(const pt::ptree &item)
+void OsmJsonReader::_parseOverpassRelation(const pt::ptree& item)
 {
+  const long debugId = 0;
+
   // Get info we need to construct our relation
-  long id = -1;
+  long id = item.get("id", id);
+
+  // See related note in OsmXmlReader::_createRelation.
+//  if (_relationIdMap.contains(id))
+//  {
+//    throw HootException(
+//      QString("Duplicate realtion id %1 in map %2 encountered.").arg(id).arg(_path));
+//  }
+
+  long newId;
   if (_useDataSourceIds)
-    id = item.get("id", id);
+  {
+    newId = id;
+  }
   else
-    id = _map->createNextRelationId();
+  {
+    newId = _map->createNextRelationId();
+  }
+  _relationIdMap.insert(id, newId);
+
+  const QString msg = "Reading " + ElementId(ElementType::Relation, newId).toString() + "...";
+  if (newId == debugId)
+  {
+    LOG_VARD(msg);
+  }
+  else
+  {
+    LOG_VART(msg);
+  }
+
+  long version = ElementData::VERSION_EMPTY;
+  version = item.get("version", version);
+  long changeset = ElementData::CHANGESET_EMPTY;
+  changeset = item.get("changeset", changeset);
+  unsigned int timestamp = ElementData::TIMESTAMP_EMPTY;
+  timestamp = item.get("timestamp", timestamp);
+  std::string user = ElementData::USER_EMPTY.toStdString();
+  user = item.get("user", user);
+  long uid = ElementData::UID_EMPTY;
+  uid = item.get("uid", uid);
 
   // Construct Relation
-  RelationPtr pRelation(new Relation(_defaultStatus, id, _defaultCircErr));
+  RelationPtr pRelation(
+    new Relation(
+      _defaultStatus, newId, _defaultCircErr, "", changeset, version, timestamp,
+      QString::fromStdString(user), uid));
 
   // Add members
   if (item.not_found() != item.find("members"))
@@ -393,9 +592,80 @@ void OsmJsonReader::_parseOverpassRelation(const pt::ptree &item)
       long ref = memberIt->second.get("ref", -1l); // default -1 ?
       string role = memberIt->second.get("role", string(""));
 
-      pRelation->addElement(QString::fromStdString(role),
-                            ElementType::fromString(QString::fromStdString(typeStr)),
-                            ref);
+      bool okToAdd = false;
+      if (typeStr == "node")
+      {
+        const bool memberPresent = _nodeIdMap.contains(ref);
+        if (!memberPresent)
+        {
+          _missingNodeCount++;
+          if (logWarnCount < Log::getWarnMessageLimit())
+          {
+            LOG_WARN(
+              "Missing " << ElementId(ElementType::Node, ref) << " in " <<
+              ElementId(ElementType::Relation, newId) << ".");
+          }
+          else if (logWarnCount == Log::getWarnMessageLimit())
+          {
+            LOG_WARN(className() << ": " << Log::LOG_WARN_LIMIT_REACHED_MESSAGE);
+          }
+          logWarnCount++;
+        }
+        else
+        {
+          okToAdd = true;
+        }
+      }
+      else if (typeStr == "way")
+      {
+        const bool memberPresent = _wayIdMap.contains(ref);
+        if (!memberPresent)
+        {
+          _missingWayCount++;
+          if (logWarnCount < Log::getWarnMessageLimit())
+          {
+            LOG_WARN(
+              "Missing " << ElementId(ElementType::Way, ref) << " in " <<
+              ElementId(ElementType::Relation, newId) << ".");
+          }
+          else if (logWarnCount == Log::getWarnMessageLimit())
+          {
+            LOG_WARN(className() << ": " << Log::LOG_WARN_LIMIT_REACHED_MESSAGE);
+          }
+          logWarnCount++;
+        }
+        else
+        {
+          okToAdd = true;
+        }
+      }
+      else if (typeStr == "relation")
+      {
+        ref = _getRelationId(ref);
+        okToAdd = true;
+      }
+      else
+      {
+        if (logWarnCount < Log::getWarnMessageLimit())
+        {
+          LOG_WARN("Found a relation member with unexpected type: " << typeStr << " in relation ("
+                     << id << ")");
+        }
+        else if (logWarnCount == Log::getWarnMessageLimit())
+        {
+          LOG_WARN(className() << ": " << Log::LOG_WARN_LIMIT_REACHED_MESSAGE);
+        }
+        logWarnCount++;
+        okToAdd = false;
+      }
+
+      if (okToAdd)
+      {
+        LOG_TRACE("Adding relation relation member: " << ref << "...");
+        pRelation->addElement(QString::fromStdString(role),
+                              ElementType::fromString(QString::fromStdString(typeStr)), ref);
+      }
+
       ++memberIt;
     }
   }
@@ -406,6 +676,8 @@ void OsmJsonReader::_parseOverpassRelation(const pt::ptree &item)
   // Add relation to map
   _map->addRelation(pRelation);
 
+  LOG_TRACE("Loaded relation: " << pRelation);
+
   _numRead++;
   if (_numRead % _statusUpdateInterval == 0)
   {
@@ -413,7 +685,30 @@ void OsmJsonReader::_parseOverpassRelation(const pt::ptree &item)
   }
 }
 
-void OsmJsonReader::_addTags(const boost::property_tree::ptree &item, hoot::ElementPtr pElement)
+long OsmJsonReader::_getRelationId(long fileId)
+{
+  long newId;
+  if (_useDataSourceIds)
+  {
+    newId = fileId;
+    _relationIdMap.insert(fileId, newId);
+  }
+  else
+  {
+    if (_relationIdMap.find(fileId) == _relationIdMap.end())
+    {
+      newId = _map->createNextRelationId();
+      _relationIdMap.insert(fileId, newId);
+    }
+    else
+    {
+      newId = _relationIdMap[fileId];
+    }
+  }
+  return newId;
+}
+
+void OsmJsonReader::_addTags(const boost::property_tree::ptree& item, hoot::ElementPtr pElement)
 {
   // Find tags and add them
   if (item.not_found() != item.find("tags"))
Clone this wiki locally