Skip to content

Commit

Permalink
fix(onto validation): correctly identify circular dependencies (DEV-769
Browse files Browse the repository at this point in the history
…) (#192)
  • Loading branch information
gNahcab committed May 25, 2022
1 parent 18d110c commit ed35902
Show file tree
Hide file tree
Showing 8 changed files with 350 additions and 200 deletions.
1 change: 1 addition & 0 deletions Pipfile
Expand Up @@ -16,6 +16,7 @@ rfc3987 = "*"
pystrict = "*"
openpyxl = "*"
pyparsing = "==2.4.7"
networkx = "*"

[dev-packages]
mkdocs = "*"
Expand Down
206 changes: 107 additions & 99 deletions Pipfile.lock

Large diffs are not rendered by default.

34 changes: 17 additions & 17 deletions dev-requirements.txt
Expand Up @@ -13,55 +13,55 @@ cerberus==1.3.4
certifi==2021.10.8
chardet==4.0.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
charset-normalizer==2.0.12; python_version >= '3'
click==8.1.2
click==8.1.3
colorama==0.4.4; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
distlib==0.3.4
ghp-import==2.0.2
ghp-import==2.1.0
idna==3.3; python_version >= '3'
importlib-metadata==4.11.3; python_version >= '3.7'
iniconfig==1.1.1
jinja2==3.1.1; python_version >= '3.7'
markdown==3.3.6; python_version >= '3.6'
jinja2==3.1.2; python_version >= '3.7'
markdown==3.3.7; python_version >= '3.6'
markupsafe==2.1.1; python_version >= '3.7'
mergedeep==1.3.4; python_version >= '3.6'
mkdocs-include-markdown-plugin==3.3.0
mkdocs-include-markdown-plugin==3.4.0
mkdocs-material-extensions==1.0.3; python_version >= '3.6'
mkdocs-material==8.2.11
mkdocs-material==8.2.15
mkdocs==1.3.0
mypy-extensions==0.4.3
mypy==0.942
numpy==1.22.3; platform_machine != 'aarch64' and platform_machine != 'arm64' and python_version < '3.10'
mypy==0.950
numpy==1.22.3; python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'
orderedmultidict==1.0.1
packaging==20.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pandas==1.4.2
pep517==0.12.0
pip-shims==0.7.0; python_version >= '3.6'
pip==22.0.4; python_version >= '3.7'
pip==22.1; python_version >= '3.7'
pipenv-setup==3.2.0
pipfile==0.0.2
platformdirs==2.5.2; python_version >= '3.7'
plette[validation]==0.2.3; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
pluggy==1.0.0; python_version >= '3.6'
py==1.11.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
pycodestyle==2.8.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
pygments==2.11.2; python_version >= '3.5'
pymdown-extensions==9.3; python_version >= '3.7'
pygments==2.12.0; python_version >= '3.6'
pymdown-extensions==9.4; python_version >= '3.7'
pyparsing==2.4.7
pytest==7.1.2
python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'
python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pytz==2022.1
pyyaml-env-tag==0.1; python_version >= '3.6'
pyyaml==6.0; python_version >= '3.6'
requests==2.27.1
requirementslib==1.6.4; python_version >= '3.7'
setuptools==62.1.0; python_version >= '3.7'
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'
toml==0.10.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'
setuptools==62.2.0; python_version >= '3.7'
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
toml==0.10.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
tomli==2.0.1; python_version >= '3.7'
tomlkit==0.10.2; python_version >= '3.6' and python_version < '4'
tomlkit==0.10.2; python_version >= '3.6' and python_version < '4.0'
typing-extensions==4.2.0; python_version >= '3.7'
urllib3==1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'
vistir==0.5.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
watchdog==2.1.7; python_version >= '3.6'
watchdog==2.1.8; python_version >= '3.6'
wheel==0.37.1
zipp==3.8.0; python_version >= '3.7'
158 changes: 83 additions & 75 deletions knora/dsplib/utils/onto_validate.py
@@ -1,9 +1,10 @@
import json
import os
import re
from typing import Any, Union, List, Set
from typing import Any, Union
import jsonschema
import json
import jsonpath_ng, jsonpath_ng.ext
import networkx as nx
from ..utils.expand_all_lists import expand_lists_from_excel


Expand Down Expand Up @@ -56,33 +57,65 @@ def validate_ontology(input_file_or_json: Union[str, dict[Any, Any], 'os.PathLik

def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bool:
"""
Check if there are properties derived from hasLinkTo that form a circular reference. If so, these
Check a data model if it contains properties derived from hasLinkTo that form a circular reference. If so, these
properties must have the cardinality 0-1 or 0-n, because during the xmlupload process, these values
are temporarily removed.
Args:
data_model: dictionary with a DSP project (as defined in a JSON ontology file)
Returns:
True if no circle was detected, or if all elements of all circles are of cardinality "0-1" or "0-n".
False if there is a circle with at least one element that has a cardinality of "1" or "1-n".
"""

# search the ontology for all properties that are derived from hasLinkTo, store them in a dict, and map
# them to their objects (i.e. the resource classes they point to)
# example: if the property 'rosetta:hasTextMedium' points to 'rosetta:Image2D':
# link_properties = {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}
link_properties = collect_link_properties(data_model)
errors = identify_problematic_cardinalities(data_model, link_properties)

if len(errors) == 0:
return True
else:
print('ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references '
'between resources. This is not a problem in itself, but if you try to upload data that actually '
'contains circular references, these "hasLinkTo" properties will be temporarily removed from the '
'affected resources. Therefore, it is necessary that all involved "hasLinkTo" properties have a '
'cardinality of 0-1 or 0-n. \n'
'Please make sure that the following properties have a cardinality of 0-1 or 0-n:')
for error in errors:
print(f'\t- Resource {error[0]}, property {error[1]}')
return False


def collect_link_properties(data_model: dict[Any, Any]) -> dict[str, list[str]]:
"""
map the properties derived from hasLinkTo to the resource classes they point to, for example:
link_properties = {'rosetta:hasImage2D': ['rosetta:Image2D'], ...}
"""
ontos = data_model['project']['ontologies']
link_properties: dict[str, List[str]] = dict()
hasLinkTo_props = {'hasLinkTo', 'isPartOf', 'isRegionOf', 'isAnnotationOf'}
link_properties: dict[str, list[str]] = dict()
for index, onto in enumerate(ontos):
hasLinkTo_matches = jsonpath_ng.ext.parse(
f'$.project.ontologies[{index}].properties[?@.super[*] == hasLinkTo]'
).find(data_model)
prop_obj_pair: dict[str, List[str]] = dict()
hasLinkTo_matches = list()
# look for child-properties down to 5 inheritance levels that are derived from hasLinkTo-properties
for i in range(5):
for hasLinkTo_prop in hasLinkTo_props:
hasLinkTo_matches.extend(jsonpath_ng.ext.parse(
f'$.project.ontologies[{index}].properties[?super[*] == {hasLinkTo_prop}]'
).find(data_model))
# make the children from this iteration to the parents of the next iteration
hasLinkTo_props = {x.value['name'] for x in hasLinkTo_matches}
prop_obj_pair: dict[str, list[str]] = dict()
for match in hasLinkTo_matches:
prop = onto['name'] + ':' + match.value['name']
target = match.value['object']
if target != 'Resource':
# make the target a fully qualified name (with the ontology's name prefixed)
target = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', target)
target = re.sub(r'^:([^:]+)$', f'{onto["name"]}:\\1', target)
prop_obj_pair[prop] = [target]
link_properties.update(prop_obj_pair)

# in case the object of a property is "Resource", the link can point to any resource class
all_res_names: List[str] = list()
all_res_names: list[str] = list()
for index, onto in enumerate(ontos):
matches = jsonpath_ng.ext.parse(f'$.resources[*].name').find(onto)
tmp = [f'{onto["name"]}:{match.value}' for match in matches]
Expand All @@ -91,11 +124,19 @@ def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bo
if 'Resource' in targ:
link_properties[prop] = all_res_names

# make a dict that maps resource classes to their hasLinkTo-properties, and to the classes they point to
# example: if 'rosetta:Text' has the property 'rosetta:hasTextMedium' that points to 'rosetta:Image2D':
# dependencies = {'rosetta:Text': {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}}
dependencies: dict[str, dict[str, List[str]]] = dict()
for onto in ontos:
return link_properties


def identify_problematic_cardinalities(data_model: dict[Any, Any], link_properties: dict[str, list[str]]) -> list[tuple[str, str]]:
"""
make an error list with all cardinalities that are part of a circle but have a cardinality of "1" or "1-n"
"""
# make 2 dicts of the following form:
# dependencies = {'rosetta:Text': {'rosetta:hasImage2D': ['rosetta:Image2D'], ...}}
# cardinalities = {'rosetta:Text': {'rosetta:hasImage2D': '0-1', ...}}
dependencies: dict[str, dict[str, list[str]]] = dict()
cardinalities: dict[str, dict[str, str]] = dict()
for onto in data_model['project']['ontologies']:
for resource in onto['resources']:
resname: str = onto['name'] + ':' + resource['name']
for card in resource['cardinalities']:
Expand All @@ -111,64 +152,31 @@ def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bo
if resname not in dependencies:
dependencies[resname] = dict()
dependencies[resname][cardname] = targets
cardinalities[resname] = dict()
cardinalities[resname][cardname] = card['cardinality']
elif cardname not in dependencies[resname]:
dependencies[resname][cardname] = targets
cardinalities[resname][cardname] = card['cardinality']
else:
dependencies[resname][cardname].extend(targets)

# iteratively purge dependencies from non-circular references
for _ in range(30):
# remove targets that point to a resource that is not in dependencies,
# remove cardinalities that have no targets
for res, cards in dependencies.copy().items():
for card, targets in cards.copy().items():
dependencies[res][card] = [target for target in targets if target in dependencies]
if len(dependencies[res][card]) == 0:
del dependencies[res][card]
# remove resources that have no cardinalities
dependencies = {res: cards for res, cards in dependencies.items() if len(cards) > 0}
# remove resources that are not pointed to by any target
all_targets: Set[str] = set()
for cards in dependencies.values():
for trgt in cards.values():
all_targets = all_targets | set(trgt)
dependencies = {res: targets for res, targets in dependencies.items() if res in all_targets}

# check the remaining dependencies (which are only the circular ones) if they have all 0-1 or 0-n
ok_cardinalities = ['0-1', '0-n']
notok_dependencies: dict[str, List[str]] = dict()
for res, cards in dependencies.items():
ontoname, resname = res.split(':')
for card in cards:
# the name of the cardinality could be with prepended onto, only with colon, or without anything
card_without_colon = card.split(':')[1]
card_with_colon = ':' + card_without_colon
card_variations = [card, card_with_colon, card_without_colon]
for card_variation in card_variations:
match = jsonpath_ng.ext.parse(
f'$[?@.name == {ontoname}].resources[?@.name == {resname}].cardinalities[?@.propname == "{card_variation}"]'
).find(ontos)
if len(match) > 0:
break
card_numbers = match[0].value['cardinality']
if card_numbers not in ok_cardinalities:
if res not in notok_dependencies:
notok_dependencies[res] = [card]
else:
notok_dependencies[res].append(card)

if len(notok_dependencies) == 0:
return True
else:
print('ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references '
'between resources. This is not a problem in itself, but if you try to upload data that actually '
'contains circular references, these "hasLinkTo" cardinalities will be temporarily removed from the '
'affected resources. Therefore, it is necessary that the involved "hasLinkTo" cardinalities have a '
'cardinality of 0-1 or 0-n. \n'
'Please make sure that the following cardinalities have a cardinality of 0-1 or 0-n:')
for _res, _cards in notok_dependencies.items():
print(_res)
for card in _cards:
print(f'\t{card}')
return False

# transform the dependencies into a graph structure
graph = nx.MultiDiGraph()
for start, cards in dependencies.items():
for edge, targets in cards.items():
for target in targets:
graph.add_edge(start, target, edge)

# find elements of circles that have a cardinality of "1" or "1-n"
errors: set[tuple[str, str]] = set()
circles = list(nx.simple_cycles(graph))
for circle in circles:
for index, resource in enumerate(circle):
target = circle[(index+1) % len(circle)]
for property, targets in dependencies[resource].items():
if target in targets:
prop = property
if cardinalities[resource][prop] not in ['0-1', '0-n']:
errors.add((resource, prop))

return sorted(errors, key=lambda x: x[0])
11 changes: 6 additions & 5 deletions requirements.txt
Expand Up @@ -10,14 +10,15 @@ argparse==1.4.0
attrs==21.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
certifi==2021.10.8
charset-normalizer==2.0.12; python_version >= '3'
click==8.1.2
click==8.1.3
decorator==5.1.1; python_version >= '3.5'
et-xmlfile==1.1.0; python_version >= '3.6'
idna==3.3; python_version >= '3'
isodate==0.6.1
jsonpath-ng==1.5.3
jsonschema==4.4.0
jsonschema==4.5.1
lxml==4.8.0
networkx==2.8
openpyxl==3.0.9
ply==3.11
pyparsing==2.4.7
Expand All @@ -26,7 +27,7 @@ pystrict==1.2
rdflib==6.1.1
requests==2.27.1
rfc3987==1.3.8
setuptools==62.1.0; python_version >= '3.7'
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'
setuptools==62.2.0; python_version >= '3.7'
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
urllib3==1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'
validators==0.18.2
validators==0.19.0
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -20,7 +20,7 @@
"Operating System :: OS Independent",
],
python_requires='>=3.9.0',
install_requires=['argparse==1.4.0', "attrs==21.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 'certifi==2021.10.8', "charset-normalizer==2.0.12; python_version >= '3'", 'click==8.1.2', "decorator==5.1.1; python_version >= '3.5'", "et-xmlfile==1.1.0; python_version >= '3.6'", "idna==3.3; python_version >= '3'", 'isodate==0.6.1', 'jsonpath-ng==1.5.3', 'jsonschema==4.4.0', 'lxml==4.8.0', 'openpyxl==3.0.9', 'ply==3.11', 'pyparsing==2.4.7', "pyrsistent==0.18.1; python_version >= '3.7'", 'pystrict==1.2', 'rdflib==6.1.1', 'requests==2.27.1', 'rfc3987==1.3.8', "setuptools==62.1.0; python_version >= '3.7'", "six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "urllib3==1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 'validators==0.18.2'
install_requires=['argparse==1.4.0', "attrs==21.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 'certifi==2021.10.8', "charset-normalizer==2.0.12; python_version >= '3'", 'click==8.1.3', "decorator==5.1.1; python_version >= '3.5'", "et-xmlfile==1.1.0; python_version >= '3.6'", "idna==3.3; python_version >= '3'", 'isodate==0.6.1', 'jsonpath-ng==1.5.3', 'jsonschema==4.5.1', 'lxml==4.8.0', 'networkx==2.8', 'openpyxl==3.0.9', 'ply==3.11', 'pyparsing==2.4.7', "pyrsistent==0.18.1; python_version >= '3.7'", 'pystrict==1.2', 'rdflib==6.1.1', 'requests==2.27.1', 'rfc3987==1.3.8', "setuptools==62.2.0; python_version >= '3.7'", "six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "urllib3==1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 'validators==0.19.0'
],
entry_points={
'console_scripts': [
Expand Down
20 changes: 17 additions & 3 deletions test/unittests/test_create_ontology.py
Expand Up @@ -2,14 +2,18 @@
import unittest
import json
from typing import Any
import jsonpath_ng.ext

from knora.dsplib.utils.onto_create_ontology import *
from knora.dsplib.utils.onto_create_ontology import sort_resources, sort_prop_classes
from knora.dsplib.utils.onto_validate import collect_link_properties, identify_problematic_cardinalities


class TestOntoCreation(unittest.TestCase):
with open('testdata/test-onto.json', 'r') as json_file:
json_onto: dict[str, Any] = json.load(json_file)
ontology: dict[str, Any] = json_onto['project']['ontologies'][0]
project: dict[str, Any] = json.load(json_file)
ontology: dict[str, Any] = project['project']['ontologies'][0]
with open('testdata/circular-onto.json', 'r') as json_file:
circular_onto: dict[str, Any] = json.load(json_file)

def test_sort_resources(self) -> None:
"""
Expand Down Expand Up @@ -43,5 +47,15 @@ def test_sort_prop_classes(self) -> None:
self.assertListEqual(unsorted_props, sorted_props)


def test_circular_references_in_onto(self) -> None:
link_properties = collect_link_properties(self.circular_onto)
errors = identify_problematic_cardinalities(self.circular_onto, link_properties)
expected_errors = [
('testonto:AnyResource', 'testonto:linkToTestThing1'),
('testonto:TestThing3', 'testonto:linkToResource')
]
self.assertListEqual(sorted(errors), sorted(expected_errors))


if __name__ == '__main__':
unittest.main()

0 comments on commit ed35902

Please sign in to comment.