Skip to content

Commit

Permalink
feat(xmlupload): allow circular references (DEV-577) (#165)
Browse files Browse the repository at this point in the history
  • Loading branch information
jnussbaum committed Mar 25, 2022
1 parent 9a9e5f0 commit 75a444f
Show file tree
Hide file tree
Showing 9 changed files with 495 additions and 86 deletions.
7 changes: 3 additions & 4 deletions knora/dsplib/models/resource.py
Expand Up @@ -25,7 +25,7 @@ class KnoraStandoffXmlEncoder(json.JSONEncoder):

def default(self, obj) -> str:
if isinstance(obj, KnoraStandoffXml):
return '<?xml version="1.0" encoding="UTF-8"?>\n<text>' + obj.getXml() + '</text>'
return '<?xml version="1.0" encoding="UTF-8"?>\n<text>' + str(obj) + '</text>'
elif isinstance(obj, OntoInfo):
return obj.iri + "#" if obj.hashtag else ""
return json.JSONEncoder.default(self, obj)
Expand Down Expand Up @@ -275,10 +275,9 @@ def toJsonLdObj(self, action: Actions) -> Any:
tmp['@context'] = self.context
return tmp

def create(self):
def create(self) -> 'ResourceInstance':
jsonobj = self.toJsonLdObj(Actions.Create)
jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '), cls=KnoraStandoffXmlEncoder)
# print("jsondata", jsondata)
result = self._con.post(ResourceInstance.ROUTE, jsondata)
newinstance = self.clone()
newinstance._iri = result['@id']
Expand Down Expand Up @@ -394,7 +393,7 @@ def _get_baseclass(self, superclasses: list[str]) -> Union[str, None]:
return self._get_baseclass(gaga.superclasses)
return None

def get_resclass(self, prefixedresclass: str) -> Type:
def get_resclass_type(self, prefixedresclass: str) -> Type:
prefix, resclass_name = prefixedresclass.split(':')
resclass = [x for x in self._ontologies[prefix].resource_classes if x.name == resclass_name][0]
baseclass = self._get_baseclass(resclass.superclasses)
Expand Down
7 changes: 3 additions & 4 deletions knora/dsplib/models/sipi.py
@@ -1,7 +1,6 @@
import os

import requests

from typing import Any
from .helpers import BaseError


Expand Down Expand Up @@ -30,7 +29,7 @@ def __init__(self, sipi_server: str, token: str):
self.sipi_server = sipi_server
self.token = token

def upload_bitstream(self, filepath):
def upload_bitstream(self, filepath: str) -> dict[Any, Any]:
"""
Uploads a bitstream to the Sipi server
Expand All @@ -45,5 +44,5 @@ def upload_bitstream(self, filepath):
req = requests.post(self.sipi_server + "/upload?token=" + self.token, files=files)
on_api_error(req)
print(f'Uploaded file {filepath}')
res = req.json()
res: dict[Any, Any] = req.json()
return res
10 changes: 5 additions & 5 deletions knora/dsplib/models/value.py
Expand Up @@ -16,21 +16,21 @@ class KnoraStandoffXml:
__iriregexp = re.compile(r'IRI:[^:]*:IRI')
__xmlstr: str

def __init__(self, xmlstr: str) -> str:
def __init__(self, xmlstr: str) -> None:
self.__xmlstr = str(xmlstr)

def __str__(self) -> str:
return self.__xmlstr

def getXml(self) -> str:
return self.__xmlstr

def findall(self) -> Union[list[str], None]:
def get_all_iris(self) -> Optional[list[str]]:
return self.__iriregexp.findall(self.__xmlstr)

def replace(self, fromStr: str, toStr: str) -> None:
self.__xmlstr = self.__xmlstr.replace(fromStr, toStr)

def regex_replace(self, pattern: str, repl: str) -> None:
self.__xmlstr = re.sub(pattern=repr(pattern)[1:-1], repl=repl, string=self.__xmlstr)


@strict
class Value:
Expand Down
144 changes: 133 additions & 11 deletions knora/dsplib/utils/onto_validate.py
@@ -1,14 +1,13 @@
import json
import os
from typing import Dict, Union

import re
from typing import Any, Union, List, Set
import jsonschema
from jsonschema import validate

import jsonpath_ng, jsonpath_ng.ext
from ..utils.expand_all_lists import expand_lists_from_excel


def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool:
def validate_ontology(input_file_or_json: Union[str, dict[Any, Any], 'os.PathLike[Any]']) -> bool:
"""
Validates an ontology against the knora schema
Expand All @@ -18,8 +17,8 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool
Returns:
True if ontology passed validation, False otherwise
"""
data_model = ''

data_model: dict[Any, Any] = {}
if isinstance(input_file_or_json, dict):
data_model = input_file_or_json
elif os.path.isfile(input_file_or_json):
Expand All @@ -38,15 +37,138 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool

# validate the data model against the schema
current_dir = os.path.dirname(os.path.realpath(__file__))

with open(os.path.join(current_dir, '../schemas/ontology.json')) as s:
schema = json.load(s)

try:
validate(instance=data_model, schema=schema)
jsonschema.validate(instance=data_model, schema=schema)
except jsonschema.exceptions.ValidationError as err:
print(f'Data model did not pass validation. The error message is: {err.message}\n'
f'The error occurred at {err.json_path}')
return False
print('Data model is syntactically correct and passed validation.')
return True

# cardinalities check for circular references
if check_cardinalities_of_circular_references(data_model):
print('Data model is syntactically correct and passed validation.')
return True
else:
return False


def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bool:
"""
Check if there are properties derived from hasLinkTo that form a circular reference. If so, these
properties must have the cardinality 0-1 or 0-n, because during the xmlupload process, these values
are temporarily removed.
"""

# search the ontology for all properties that are derived from hasLinkTo, store them in a dict, and map
# them to their objects (i.e. the resource classes they point to)
# example: if the property 'rosetta:hasTextMedium' points to 'rosetta:Image2D':
# link_properties = {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}
ontos = data_model['project']['ontologies']
link_properties: dict[str, List[str]] = dict()
for index, onto in enumerate(ontos):
hasLinkTo_matches = jsonpath_ng.ext.parse(
f'$.project.ontologies[{index}].properties[?@.super[*] == hasLinkTo]'
).find(data_model)
prop_obj_pair: dict[str, List[str]] = dict()
for match in hasLinkTo_matches:
prop = onto['name'] + ':' + match.value['name']
target = match.value['object']
if target != 'Resource':
# make the target a fully qualified name (with the ontology's name prefixed)
target = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', target)
prop_obj_pair[prop] = [target]
link_properties.update(prop_obj_pair)

# in case the object of a property is "Resource", the link can point to any resource class
all_res_names: List[str] = list()
for index, onto in enumerate(ontos):
matches = jsonpath_ng.ext.parse(f'$.resources[*].name').find(onto)
tmp = [f'{onto["name"]}:{match.value}' for match in matches]
all_res_names.extend(tmp)
for prop, targ in link_properties.items():
if 'Resource' in targ:
link_properties[prop] = all_res_names

# make a dict that maps resource classes to their hasLinkTo-properties, and to the classes they point to
# example: if 'rosetta:Text' has the property 'rosetta:hasTextMedium' that points to 'rosetta:Image2D':
# dependencies = {'rosetta:Text': {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}}
dependencies: dict[str, dict[str, List[str]]] = dict()
for onto in ontos:
for resource in onto['resources']:
resname: str = onto['name'] + ':' + resource['name']
for card in resource['cardinalities']:
# make the cardinality a fully qualified name (with the ontology's name prefixed)
cardname = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', card['propname'])
if cardname in link_properties:
# Look out: if `targets` is created with `targets = link_properties[cardname]`, the ex-
# pression `dependencies[resname][cardname] = targets` causes `dependencies[resname][cardname]`
# to point to `link_properties[cardname]`. Due to that, the expression
# `dependencies[resname][cardname].extend(targets)` will modify 'link_properties'!
# For this reason, `targets` must be created with `targets = list(link_properties[cardname])`
targets = list(link_properties[cardname])
if resname not in dependencies:
dependencies[resname] = dict()
dependencies[resname][cardname] = targets
elif cardname not in dependencies[resname]:
dependencies[resname][cardname] = targets
else:
dependencies[resname][cardname].extend(targets)

# iteratively purge dependencies from non-circular references
for _ in range(30):
# remove targets that point to a resource that is not in dependencies,
# remove cardinalities that have no targets
for res, cards in dependencies.copy().items():
for card, targets in cards.copy().items():
dependencies[res][card] = [target for target in targets if target in dependencies]
if len(dependencies[res][card]) == 0:
del dependencies[res][card]
# remove resources that have no cardinalities
dependencies = {res: cards for res, cards in dependencies.items() if len(cards) > 0}
# remove resources that are not pointed to by any target
all_targets: Set[str] = set()
for cards in dependencies.values():
for trgt in cards.values():
all_targets = all_targets | set(trgt)
dependencies = {res: targets for res, targets in dependencies.items() if res in all_targets}

# check the remaining dependencies (which are only the circular ones) if they have all 0-1 or 0-n
ok_cardinalities = ['0-1', '0-n']
notok_dependencies: dict[str, List[str]] = dict()
for res, cards in dependencies.items():
ontoname, resname = res.split(':')
for card in cards:
# the name of the cardinality could be with prepended onto, only with colon, or without anything
card_without_colon = card.split(':')[1]
card_with_colon = ':' + card_without_colon
card_variations = [card, card_with_colon, card_without_colon]
for card_variation in card_variations:
match = jsonpath_ng.ext.parse(
f'$[?@.name == {ontoname}].resources[?@.name == {resname}].cardinalities[?@.propname == "{card_variation}"]'
).find(ontos)
if len(match) > 0:
break
card_numbers = match[0].value['cardinality']
if card_numbers not in ok_cardinalities:
if res not in notok_dependencies:
notok_dependencies[res] = [card]
else:
notok_dependencies[res].append(card)

if len(notok_dependencies) == 0:
return True
else:
print('ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references '
'between resources. This is not a problem in itself, but if you try to upload data that actually '
'contains circular references, these "hasLinkTo" cardinalities will be temporarily removed from the '
'affected resources. Therefore, it is necessary that the involved "hasLinkTo" cardinalities have a '
'cardinality of 0-1 or 0-n. \n'
'Please make sure that the following cardinalities have a cardinality of 0-1 or 0-n:')
for _res, _cards in notok_dependencies.items():
print(_res)
for card in _cards:
print(f'\t{card}')
return False

0 comments on commit 75a444f

Please sign in to comment.