Skip to content

v0.2.53..v0.2.54 changeset OsmSchema.cpp

Garret Voltz edited this page Mar 31, 2020 · 1 revision
diff --git a/hoot-core/src/main/cpp/hoot/core/schema/OsmSchema.cpp b/hoot-core/src/main/cpp/hoot/core/schema/OsmSchema.cpp
index ebefb14..09544ca 100644
--- a/hoot-core/src/main/cpp/hoot/core/schema/OsmSchema.cpp
+++ b/hoot-core/src/main/cpp/hoot/core/schema/OsmSchema.cpp
@@ -53,6 +53,7 @@
 #include <hoot/core/schema/OsmSchemaLoader.h>
 #include <hoot/core/util/ConfigOptions.h>
 #include <hoot/core/util/FileUtils.h>
+#include <hoot/core/util/StringUtils.h>
 #include <hoot/core/conflate/address/AddressParser.h>
 
 // Qt
@@ -481,32 +482,6 @@ public:
     return result;
   }
 
-  QSet<QString> getAllTypeKeys()
-  {
-    QSet<QString> typeTagKeys;
-
-    QSet<QString> allTagKeysTemp = OsmSchema::getInstance().getAllTagKeys();
-    // not completely sure what should be in this list; would be nice to have access to isMetadata
-    // here
-    allTagKeysTemp.remove(MetadataTags::Ref1());
-    allTagKeysTemp.remove(MetadataTags::Ref2());
-    allTagKeysTemp.remove("uuid");
-    allTagKeysTemp.remove("name");
-    allTagKeysTemp.remove("ele");
-    for (QSet<QString>::const_iterator it = allTagKeysTemp.begin(); it != allTagKeysTemp.end();
-         ++it)
-    {
-      const QString tagKey = *it;
-      //address tags aren't really type tags
-      if (!tagKey.startsWith("addr:"))
-      {
-        typeTagKeys.insert(tagKey);
-      }
-    }
-
-    return typeTagKeys;
-  }
-
   vector<SchemaVertex> getAssociatedTags(QString name)
   {
     set<VertexId> vids;
@@ -1411,6 +1386,8 @@ private:
 
 std::shared_ptr<OsmSchema> OsmSchema::_theInstance = NULL;
 
+QStringList _genericKvps;
+
 OsmSchema::OsmSchema()
 {
   d = new OsmSchemaData();
@@ -1470,8 +1447,20 @@ QSet<QString> OsmSchema::getAllTagKeys()
 QSet<QString> OsmSchema::getAllTypeKeys()
 {
   if (_allTypeKeysCache.isEmpty())
-  {
-    _allTypeKeysCache = d->getAllTypeKeys();
+  {  
+    QSet<QString> allTypeKeysCacheTemp = d->getAllTagKeys();
+    for (QSet<QString>::const_iterator typeKeyItr = allTypeKeysCacheTemp.constBegin();
+         typeKeyItr != allTypeKeysCacheTemp.constEnd(); ++typeKeyItr)
+    {
+      const QString typeKey = *typeKeyItr;
+      // All we care about for type comparison are tags of schema type "tag". We definitely don't
+      // care about metadata tags, but its possible we may care about some text or numeric tags
+      // at some point.
+      if (!isMetaData(typeKey, "") && !isTextTag(typeKey) && !isNumericTag(typeKey))
+      {
+        _allTypeKeysCache.insert(typeKey);
+      }
+    }
     //LOG_VART(_allTypeKeysCache);
   }
   return _allTypeKeysCache;
@@ -1782,6 +1771,17 @@ bool OsmSchema::isMetaData(const QString& key, const QString& /*value*/)
   }
 }
 
+bool OsmSchema::isTextTag(const QString& key)
+{
+  return getTagVertex(key).valueType == Text;
+}
+
+bool OsmSchema::isNumericTag(const QString& key)
+{
+  TagValueType valueType = getTagVertex(key).valueType;
+  return valueType == Real || valueType == Int;
+}
+
 void OsmSchema::loadDefault()
 {
   QString path = ConfPath::search("schema.json");
@@ -1814,7 +1814,6 @@ double OsmSchema::score(const QString& kvp, const Tags& tags)
     const QString value = tagItr.value().trimmed();
     if (!key.isEmpty() && !value.isEmpty())
     {
-      //QString kvp2 = tagItr.key() + "=" + tagItr.value();
       QString kvp2 = "";
       kvp2.append(key);
       kvp2.append("=");
@@ -1825,50 +1824,189 @@ double OsmSchema::score(const QString& kvp, const Tags& tags)
         maxScore = scoreVal;
       }
     }
-
   }
   return maxScore;
 }
 
-double OsmSchema::scoreTypes(const Tags& tags1, const Tags& tags2)
+bool OsmSchema::isGeneric(const Tags& tags)
 {
+  return
+    !hasMoreThanOneType(tags) &&
+    StringUtils::containsAny(tags.toKvps(), getGenericKvps().toList());
+}
+
+bool OsmSchema::isGenericKvp(const QString& kvp)
+{
+  return getGenericKvps().contains(kvp);
+}
+
+QSet<QString> OsmSchema::getGenericKvps() const
+{
+  // There may be a better way to manage these in the schema itself.
+  if (_genericKvps.isEmpty())
+  {
+    _genericKvps.insert("poi=yes");
+    _genericKvps.insert("building=yes");
+    _genericKvps.insert("area=yes");
+    //_genericKvps.insert("type=route");
+  }
+  return _genericKvps;
+}
+
+QString OsmSchema::getFirstType(const Tags& tags, const bool allowGeneric)
+{
+  QStringList keys = tags.keys();
+  keys.sort();
+  for (int i = 0; i < keys.size(); i++)
+  {
+    const QString key = keys.at(i);
+    const QString val = tags[key];
+    const QString kvp = toKvp(key, val);
+    if (isTypeKey(key) && (allowGeneric || !isGenericKvp(kvp)))
+    {
+      return kvp;
+    }
+  }
+  return "";
+}
+
+bool OsmSchema::explicitTypeMismatch(const Tags& tags1, const Tags& tags2,
+                                     const double minTypeScore)
+{
+  // TODO: We may need to take category into account here as well.
+
+  LOG_VARD(tags1);
+  LOG_VARD(tags2);
+
+  bool featuresHaveExplicitTypeMismatch = false;
+
+  const bool feature1HasType = hasType(tags1);
+  if (feature1HasType)
+  {
+    const bool feature2HasType = hasType(tags2);
+    if (feature2HasType)
+    {
+      const bool feature1Generic = isGeneric(tags1);
+      if (!feature1Generic)
+      {
+        const bool feature2Generic = isGeneric(tags2);
+        if (!feature2Generic)
+        {
+          const double typeScore = scoreTypes(tags1, tags2, true);
+          if (typeScore < minTypeScore)
+          {
+            featuresHaveExplicitTypeMismatch = true;
+            LOG_DEBUG(
+              "explicit type mismatch: " << getFirstType(tags1, false) << " and " <<
+              getFirstType(tags2, false));
+          }
+          else
+          {
+            LOG_DEBUG(
+              "explicit type match: " << getFirstType(tags1, false) << " and " <<
+              getFirstType(tags2, false));
+          }
+        }
+      }
+    }
+  }
+
+  LOG_VARD(featuresHaveExplicitTypeMismatch);
+  return featuresHaveExplicitTypeMismatch;
+}
+
+bool OsmSchema::hasType(const Tags& tags)
+{
+  for (Tags::const_iterator tagsItr = tags.begin(); tagsItr != tags.end(); ++tagsItr)
+  {
+    LOG_VART(tagsItr.key());
+    LOG_VART(isTypeKey(tagsItr.key()));
+    if (isTypeKey(tagsItr.key()))
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool OsmSchema::hasMoreThanOneType(const Tags& tags)
+{
+  int count = 0;
+  for (Tags::const_iterator tagsItr = tags.begin(); tagsItr != tags.end(); ++tagsItr)
+  {
+    LOG_VART(tagsItr.key());
+    LOG_VART(isTypeKey(tagsItr.key()));
+    if (isTypeKey(tagsItr.key()))
+    {
+      count++;
+      if (count > 1)
+      {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+double OsmSchema::scoreTypes(const Tags& tags1, const Tags& tags2, const bool ignoreGenericTypes)
+{  
   double maxScore = 0.0;
+
   for (Tags::const_iterator tags1Itr = tags1.begin(); tags1Itr != tags1.end(); ++tags1Itr)
   {
     const QString key1 = tags1Itr.key().trimmed();
     const QString val1 = tags1Itr.value().trimmed();
-    const QString kvp1 = toKvp(key1, val1);
-    LOG_VART(key1);
-    LOG_VART(val1);
-    LOG_VART(kvp1);
-    LOG_VART(isMetaData(key1, val1));
-    LOG_VART(isTypeKey(key1));
-    if (!key1.isEmpty() && !val1.isEmpty() && !isMetaData(key1, val1) &&
-        (isTypeKey(key1) || isTypeKey(kvp1)))
-    {
-      for (Tags::const_iterator tags2Itr = tags2.begin(); tags2Itr != tags2.end(); ++tags2Itr)
+    if (!key1.isEmpty() && !val1.isEmpty())
+    {
+      const QString kvp1 = toKvp(key1, val1);
+
+      LOG_VART(key1);
+      LOG_VART(val1);
+      LOG_VART(kvp1);
+      LOG_VART(isMetaData(key1, val1));
+      LOG_VART(isTypeKey(key1));
+
+      if (ignoreGenericTypes && getGenericKvps().contains(kvp1))
       {
-        const QString key2 = tags2Itr.key().trimmed();
-        const QString val2 = tags2Itr.value().trimmed();
-        const QString kvp2 = toKvp(key2, val2);
-        LOG_VART(key2);
-        LOG_VART(val2);
-        LOG_VART(kvp2);
-        LOG_VART(isMetaData(key2, val2));
-        LOG_VART(isTypeKey(key2));
-        if (!key2.isEmpty() && !val2.isEmpty() && !isMetaData(key2, val2) &&
-            (isTypeKey(key2) || isTypeKey(kvp2)))
+        continue;
+      }
+
+      if (!isMetaData(key1, val1) && (isTypeKey(key1) || isTypeKey(kvp1)))
+      {
+        for (Tags::const_iterator tags2Itr = tags2.begin(); tags2Itr != tags2.end(); ++tags2Itr)
         {
-          const double score = OsmSchema::getInstance().score(kvp1, kvp2);
-          LOG_VART(score);
-          if (score > maxScore)
+          const QString key2 = tags2Itr.key().trimmed();
+          const QString val2 = tags2Itr.value().trimmed();
+          if (!key2.isEmpty() && !val2.isEmpty())
           {
-            maxScore = score;
+            const QString kvp2 = toKvp(key2, val2);
+
+            LOG_VART(key2);
+            LOG_VART(val2);
+            LOG_VART(kvp2);
+            LOG_VART(isMetaData(key2, val2));
+            LOG_VART(isTypeKey(key2));
+
+            if (ignoreGenericTypes && getGenericKvps().contains(kvp2))
+            {
+              continue;
+            }
+
+            if (!isMetaData(key2, val2) && (isTypeKey(key2) || isTypeKey(kvp2)))
+            {
+              const double calculatedScore = score(kvp1, kvp2);
+              LOG_VART(calculatedScore);
+              if (calculatedScore > maxScore)
+              {
+                maxScore = calculatedScore;
+              }
+            }
           }
         }
       }
     }
   }
+
   LOG_VART(maxScore);
   return maxScore;
 }
Clone this wiki locally