Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(onto validation): correctly identify circular dependencies (DEV-769) #192

Merged
merged 39 commits into from May 25, 2022
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
5e552df
add draft-script to detect circles
May 3, 2022
4e3b71b
add more description to draft
May 3, 2022
68cb973
add more description to draft
May 3, 2022
4c632e4
fix
May 3, 2022
7863ef8
add shortname to resource names
May 3, 2022
b078b97
add circle detection and error message
May 3, 2022
a775d49
outsource a part of a method
May 3, 2022
a199860
add description
May 3, 2022
86d69b8
test script
May 3, 2022
bced991
deactivate load_ontology
May 3, 2022
32c656a
fix in circle finder
May 3, 2022
2467def
fix
May 3, 2022
ea8a563
add description
May 3, 2022
400d113
fix and add formatting
May 4, 2022
856bd18
add draft circle_detection with networx library
May 4, 2022
fa8b0fa
add methods and description
May 9, 2022
cc23c70
adjust Pipfile
May 9, 2022
3a5b166
adjust setup.py
May 9, 2022
652db32
fix onto_validate.py
May 9, 2022
b472500
add description
May 9, 2022
e28fc67
add description and update onto_validate.py
May 10, 2022
e24562b
rectify onto_validate.py
May 16, 2022
d9cb1a5
adjust error message
May 16, 2022
b46c4b8
Merge branch 'main' into wip/dev-769-onto-validation-circular
jnussbaum May 16, 2022
5095725
delete unused files
jnussbaum May 16, 2022
e80c67c
revert manual change of setup.py
jnussbaum May 16, 2022
380d7c3
pipenv install networkx
jnussbaum May 16, 2022
8be6a0f
refactor code to improve readability
jnussbaum May 16, 2022
3903db9
refactor code to improve readability
jnussbaum May 16, 2022
07cffcf
continue work
jnussbaum May 19, 2022
751294d
working solution
jnussbaum May 23, 2022
f7475d2
improve circle recognition
jnussbaum May 23, 2022
a4d6503
take into account
jnussbaum May 25, 2022
301664f
refactor code to make it testable
jnussbaum May 25, 2022
e02dc46
extend test-onto for circularity-test
jnussbaum May 25, 2022
62c766c
make separate onto for circularity-test
jnussbaum May 25, 2022
cda67c6
improve code testability
jnussbaum May 25, 2022
eb7c29f
save circular as file, to make it reusable
jnussbaum May 25, 2022
f5c482d
Apply suggestions from code review
jnussbaum May 25, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Expand Up @@ -16,6 +16,7 @@ rfc3987 = "*"
pystrict = "*"
openpyxl = "*"
pyparsing = "==2.4.7"
networkx = "*"

[dev-packages]
mkdocs = "*"
Expand Down
206 changes: 107 additions & 99 deletions Pipfile.lock

Large diffs are not rendered by default.

34 changes: 17 additions & 17 deletions dev-requirements.txt
Expand Up @@ -13,55 +13,55 @@ cerberus==1.3.4
certifi==2021.10.8
chardet==4.0.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
charset-normalizer==2.0.12; python_version >= '3'
click==8.1.2
click==8.1.3
colorama==0.4.4; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
distlib==0.3.4
ghp-import==2.0.2
ghp-import==2.1.0
idna==3.3; python_version >= '3'
importlib-metadata==4.11.3; python_version >= '3.7'
iniconfig==1.1.1
jinja2==3.1.1; python_version >= '3.7'
markdown==3.3.6; python_version >= '3.6'
jinja2==3.1.2; python_version >= '3.7'
markdown==3.3.7; python_version >= '3.6'
markupsafe==2.1.1; python_version >= '3.7'
mergedeep==1.3.4; python_version >= '3.6'
mkdocs-include-markdown-plugin==3.3.0
mkdocs-include-markdown-plugin==3.4.0
mkdocs-material-extensions==1.0.3; python_version >= '3.6'
mkdocs-material==8.2.11
mkdocs-material==8.2.15
mkdocs==1.3.0
mypy-extensions==0.4.3
mypy==0.942
numpy==1.22.3; platform_machine != 'aarch64' and platform_machine != 'arm64' and python_version < '3.10'
mypy==0.950
numpy==1.22.3; python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'
orderedmultidict==1.0.1
packaging==20.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pandas==1.4.2
pep517==0.12.0
pip-shims==0.7.0; python_version >= '3.6'
pip==22.0.4; python_version >= '3.7'
pip==22.1; python_version >= '3.7'
pipenv-setup==3.2.0
pipfile==0.0.2
platformdirs==2.5.2; python_version >= '3.7'
plette[validation]==0.2.3; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
pluggy==1.0.0; python_version >= '3.6'
py==1.11.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
pycodestyle==2.8.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
pygments==2.11.2; python_version >= '3.5'
pymdown-extensions==9.3; python_version >= '3.7'
pygments==2.12.0; python_version >= '3.6'
pymdown-extensions==9.4; python_version >= '3.7'
pyparsing==2.4.7
pytest==7.1.2
python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'
python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pytz==2022.1
pyyaml-env-tag==0.1; python_version >= '3.6'
pyyaml==6.0; python_version >= '3.6'
requests==2.27.1
requirementslib==1.6.4; python_version >= '3.7'
setuptools==62.1.0; python_version >= '3.7'
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'
toml==0.10.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'
setuptools==62.2.0; python_version >= '3.7'
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
toml==0.10.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
tomli==2.0.1; python_version >= '3.7'
tomlkit==0.10.2; python_version >= '3.6' and python_version < '4'
tomlkit==0.10.2; python_version >= '3.6' and python_version < '4.0'
typing-extensions==4.2.0; python_version >= '3.7'
urllib3==1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'
vistir==0.5.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
watchdog==2.1.7; python_version >= '3.6'
watchdog==2.1.8; python_version >= '3.6'
wheel==0.37.1
zipp==3.8.0; python_version >= '3.7'
158 changes: 83 additions & 75 deletions knora/dsplib/utils/onto_validate.py
@@ -1,9 +1,10 @@
import json
import os
import re
from typing import Any, Union, List, Set
from typing import Any, Union
import jsonschema
import json
import jsonpath_ng, jsonpath_ng.ext
import networkx as nx
from ..utils.expand_all_lists import expand_lists_from_excel


Expand Down Expand Up @@ -56,33 +57,65 @@ def validate_ontology(input_file_or_json: Union[str, dict[Any, Any], 'os.PathLik

def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bool:
"""
Check if there are properties derived from hasLinkTo that form a circular reference. If so, these
Check a data model if it contains properties derived from hasLinkTo that form a circular reference. If so, these
properties must have the cardinality 0-1 or 0-n, because during the xmlupload process, these values
are temporarily removed.

Args:
data_model: dictionary with a DSP project (as defined in a JSON ontology file)

Returns:
True if no circle was detected, or if all elements of all circles are of cardinality "0-1" or "0-n".
False if there is a circle with at least one element that has a cardinality of "1" or "1-n".
"""

# search the ontology for all properties that are derived from hasLinkTo, store them in a dict, and map
# them to their objects (i.e. the resource classes they point to)
# example: if the property 'rosetta:hasTextMedium' points to 'rosetta:Image2D':
# link_properties = {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}
link_properties = collect_link_properties(data_model)
errors = identify_problematic_cardinalities(data_model, link_properties)

if len(errors) == 0:
return True
else:
print('ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references '
'between resources. This is not a problem in itself, but if you try to upload data that actually '
'contains circular references, these "hasLinkTo" properties will be temporarily removed from the '
'affected resources. Therefore, it is necessary that all involved "hasLinkTo" properties have a '
'cardinality of 0-1 or 0-n. \n'
'Please make sure that the following properties have a cardinality of 0-1 or 0-n:')
for error in errors:
print(f'\t- Resource {error[0]}, property {error[1]}')
return False


def collect_link_properties(data_model: dict[Any, Any]) -> dict[str, list[str]]:
"""
map the properties derived from hasLinkTo to the resource classes they point to, for example:
link_properties = {'rosetta:hasImage2D': ['rosetta:Image2D'], ...}
"""
ontos = data_model['project']['ontologies']
link_properties: dict[str, List[str]] = dict()
hasLinkTo_props = {'hasLinkTo', 'isPartOf', 'isRegionOf', 'isAnnotationOf'}
link_properties: dict[str, list[str]] = dict()
for index, onto in enumerate(ontos):
hasLinkTo_matches = jsonpath_ng.ext.parse(
f'$.project.ontologies[{index}].properties[?@.super[*] == hasLinkTo]'
).find(data_model)
prop_obj_pair: dict[str, List[str]] = dict()
hasLinkTo_matches = list()
# look for child-properties down to 5 inheritance levels that are derived from hasLinkTo-properties
for i in range(5):
for hasLinkTo_prop in hasLinkTo_props:
hasLinkTo_matches.extend(jsonpath_ng.ext.parse(
f'$.project.ontologies[{index}].properties[?super[*] == {hasLinkTo_prop}]'
).find(data_model))
# make the children from this round to the parents of the next round
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved
hasLinkTo_props = {x.value['name'] for x in hasLinkTo_matches}
prop_obj_pair: dict[str, list[str]] = dict()
for match in hasLinkTo_matches:
prop = onto['name'] + ':' + match.value['name']
target = match.value['object']
if target != 'Resource':
# make the target a fully qualified name (with the ontology's name prefixed)
target = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', target)
target = re.sub(r'^:([^:]+)$', f'{onto["name"]}:\\1', target)
prop_obj_pair[prop] = [target]
link_properties.update(prop_obj_pair)

# in case the object of a property is "Resource", the link can point to any resource class
all_res_names: List[str] = list()
all_res_names: list[str] = list()
for index, onto in enumerate(ontos):
matches = jsonpath_ng.ext.parse(f'$.resources[*].name').find(onto)
tmp = [f'{onto["name"]}:{match.value}' for match in matches]
Expand All @@ -91,11 +124,19 @@ def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bo
if 'Resource' in targ:
link_properties[prop] = all_res_names

# make a dict that maps resource classes to their hasLinkTo-properties, and to the classes they point to
# example: if 'rosetta:Text' has the property 'rosetta:hasTextMedium' that points to 'rosetta:Image2D':
# dependencies = {'rosetta:Text': {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}}
dependencies: dict[str, dict[str, List[str]]] = dict()
for onto in ontos:
return link_properties


def identify_problematic_cardinalities(data_model: dict[Any, Any], link_properties: dict[str, list[str]]) -> list[tuple[str, str]]:
"""
make an error list with all cardinalities that are part of a circle but have "1" or "1-n"
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved
"""
# make 2 dicts of the following form:
# dependencies = {'rosetta:Text': {'rosetta:hasImage2D': ['rosetta:Image2D'], ...}}
# cardinalities = {'rosetta:Text': {'rosetta:hasImage2D': '0-1', ...}}
dependencies: dict[str, dict[str, list[str]]] = dict()
cardinalities: dict[str, dict[str, str]] = dict()
for onto in data_model['project']['ontologies']:
for resource in onto['resources']:
resname: str = onto['name'] + ':' + resource['name']
for card in resource['cardinalities']:
Expand All @@ -111,64 +152,31 @@ def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bo
if resname not in dependencies:
dependencies[resname] = dict()
dependencies[resname][cardname] = targets
cardinalities[resname] = dict()
cardinalities[resname][cardname] = card['cardinality']
elif cardname not in dependencies[resname]:
dependencies[resname][cardname] = targets
cardinalities[resname][cardname] = card['cardinality']
else:
dependencies[resname][cardname].extend(targets)

# iteratively purge dependencies from non-circular references
for _ in range(30):
# remove targets that point to a resource that is not in dependencies,
# remove cardinalities that have no targets
for res, cards in dependencies.copy().items():
for card, targets in cards.copy().items():
dependencies[res][card] = [target for target in targets if target in dependencies]
if len(dependencies[res][card]) == 0:
del dependencies[res][card]
# remove resources that have no cardinalities
dependencies = {res: cards for res, cards in dependencies.items() if len(cards) > 0}
# remove resources that are not pointed to by any target
all_targets: Set[str] = set()
for cards in dependencies.values():
for trgt in cards.values():
all_targets = all_targets | set(trgt)
dependencies = {res: targets for res, targets in dependencies.items() if res in all_targets}

# check the remaining dependencies (which are only the circular ones) if they have all 0-1 or 0-n
ok_cardinalities = ['0-1', '0-n']
notok_dependencies: dict[str, List[str]] = dict()
for res, cards in dependencies.items():
ontoname, resname = res.split(':')
for card in cards:
# the name of the cardinality could be with prepended onto, only with colon, or without anything
card_without_colon = card.split(':')[1]
card_with_colon = ':' + card_without_colon
card_variations = [card, card_with_colon, card_without_colon]
for card_variation in card_variations:
match = jsonpath_ng.ext.parse(
f'$[?@.name == {ontoname}].resources[?@.name == {resname}].cardinalities[?@.propname == "{card_variation}"]'
).find(ontos)
if len(match) > 0:
break
card_numbers = match[0].value['cardinality']
if card_numbers not in ok_cardinalities:
if res not in notok_dependencies:
notok_dependencies[res] = [card]
else:
notok_dependencies[res].append(card)

if len(notok_dependencies) == 0:
return True
else:
print('ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references '
'between resources. This is not a problem in itself, but if you try to upload data that actually '
'contains circular references, these "hasLinkTo" cardinalities will be temporarily removed from the '
'affected resources. Therefore, it is necessary that the involved "hasLinkTo" cardinalities have a '
'cardinality of 0-1 or 0-n. \n'
'Please make sure that the following cardinalities have a cardinality of 0-1 or 0-n:')
for _res, _cards in notok_dependencies.items():
print(_res)
for card in _cards:
print(f'\t{card}')
return False

# transform the dependencies into a graph structure
graph = nx.MultiDiGraph()
for start, cards in dependencies.items():
for edge, targets in cards.items():
for target in targets:
graph.add_edge(start, target, edge)

# find elements of circles that have a cardinality of "1" or "1-n"
errors: set[tuple[str, str]] = set()
circles = list(nx.simple_cycles(graph))
for circle in circles:
for index, resource in enumerate(circle):
target = circle[(index+1) % len(circle)]
for property, targets in dependencies[resource].items():
if target in targets:
prop = property
if cardinalities[resource][prop] not in ['0-1', '0-n']:
errors.add((resource, prop))

return sorted(errors, key=lambda x: x[0])
11 changes: 6 additions & 5 deletions requirements.txt
Expand Up @@ -10,14 +10,15 @@ argparse==1.4.0
attrs==21.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
certifi==2021.10.8
charset-normalizer==2.0.12; python_version >= '3'
click==8.1.2
click==8.1.3
decorator==5.1.1; python_version >= '3.5'
et-xmlfile==1.1.0; python_version >= '3.6'
idna==3.3; python_version >= '3'
isodate==0.6.1
jsonpath-ng==1.5.3
jsonschema==4.4.0
jsonschema==4.5.1
lxml==4.8.0
networkx==2.8
openpyxl==3.0.9
ply==3.11
pyparsing==2.4.7
Expand All @@ -26,7 +27,7 @@ pystrict==1.2
rdflib==6.1.1
requests==2.27.1
rfc3987==1.3.8
setuptools==62.1.0; python_version >= '3.7'
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'
setuptools==62.2.0; python_version >= '3.7'
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
urllib3==1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'
validators==0.18.2
validators==0.19.0
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -20,7 +20,7 @@
"Operating System :: OS Independent",
],
python_requires='>=3.9.0',
install_requires=['argparse==1.4.0', "attrs==21.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 'certifi==2021.10.8', "charset-normalizer==2.0.12; python_version >= '3'", 'click==8.1.2', "decorator==5.1.1; python_version >= '3.5'", "et-xmlfile==1.1.0; python_version >= '3.6'", "idna==3.3; python_version >= '3'", 'isodate==0.6.1', 'jsonpath-ng==1.5.3', 'jsonschema==4.4.0', 'lxml==4.8.0', 'openpyxl==3.0.9', 'ply==3.11', 'pyparsing==2.4.7', "pyrsistent==0.18.1; python_version >= '3.7'", 'pystrict==1.2', 'rdflib==6.1.1', 'requests==2.27.1', 'rfc3987==1.3.8', "setuptools==62.1.0; python_version >= '3.7'", "six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "urllib3==1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 'validators==0.18.2'
install_requires=['argparse==1.4.0', "attrs==21.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 'certifi==2021.10.8', "charset-normalizer==2.0.12; python_version >= '3'", 'click==8.1.3', "decorator==5.1.1; python_version >= '3.5'", "et-xmlfile==1.1.0; python_version >= '3.6'", "idna==3.3; python_version >= '3'", 'isodate==0.6.1', 'jsonpath-ng==1.5.3', 'jsonschema==4.5.1', 'lxml==4.8.0', 'networkx==2.8', 'openpyxl==3.0.9', 'ply==3.11', 'pyparsing==2.4.7', "pyrsistent==0.18.1; python_version >= '3.7'", 'pystrict==1.2', 'rdflib==6.1.1', 'requests==2.27.1', 'rfc3987==1.3.8', "setuptools==62.2.0; python_version >= '3.7'", "six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "urllib3==1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 'validators==0.19.0'
],
entry_points={
'console_scripts': [
Expand Down
20 changes: 17 additions & 3 deletions test/unittests/test_create_ontology.py
Expand Up @@ -2,14 +2,18 @@
import unittest
import json
from typing import Any
import jsonpath_ng.ext

from knora.dsplib.utils.onto_create_ontology import *
from knora.dsplib.utils.onto_create_ontology import sort_resources, sort_prop_classes
from knora.dsplib.utils.onto_validate import collect_link_properties, identify_problematic_cardinalities


class TestOntoCreation(unittest.TestCase):
with open('testdata/test-onto.json', 'r') as json_file:
json_onto: dict[str, Any] = json.load(json_file)
ontology: dict[str, Any] = json_onto['project']['ontologies'][0]
project: dict[str, Any] = json.load(json_file)
ontology: dict[str, Any] = project['project']['ontologies'][0]
with open('testdata/circular-onto.json', 'r') as json_file:
circular_onto: dict[str, Any] = json.load(json_file)

def test_sort_resources(self) -> None:
"""
Expand Down Expand Up @@ -43,5 +47,15 @@ def test_sort_prop_classes(self) -> None:
self.assertListEqual(unsorted_props, sorted_props)


def test_circular_references_in_onto(self) -> None:
link_properties = collect_link_properties(self.circular_onto)
errors = identify_problematic_cardinalities(self.circular_onto, link_properties)
expected_errors = [
('testonto:AnyResource', 'testonto:linkToTestThing1'),
('testonto:TestThing3', 'testonto:linkToResource')
]
self.assertListEqual(sorted(errors), sorted(expected_errors))


if __name__ == '__main__':
unittest.main()