Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(xmlupload): allow circular references (DEV-577) #165

Merged
merged 19 commits into from Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 3 additions & 4 deletions knora/dsplib/models/resource.py
Expand Up @@ -25,7 +25,7 @@ class KnoraStandoffXmlEncoder(json.JSONEncoder):

def default(self, obj) -> str:
if isinstance(obj, KnoraStandoffXml):
return '<?xml version="1.0" encoding="UTF-8"?>\n<text>' + obj.getXml() + '</text>'
return '<?xml version="1.0" encoding="UTF-8"?>\n<text>' + str(obj) + '</text>'
elif isinstance(obj, OntoInfo):
return obj.iri + "#" if obj.hashtag else ""
return json.JSONEncoder.default(self, obj)
Expand Down Expand Up @@ -275,10 +275,9 @@ def toJsonLdObj(self, action: Actions) -> Any:
tmp['@context'] = self.context
return tmp

def create(self):
def create(self) -> 'ResourceInstance':
jsonobj = self.toJsonLdObj(Actions.Create)
jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '), cls=KnoraStandoffXmlEncoder)
# print("jsondata", jsondata)
result = self._con.post(ResourceInstance.ROUTE, jsondata)
newinstance = self.clone()
newinstance._iri = result['@id']
Expand Down Expand Up @@ -394,7 +393,7 @@ def _get_baseclass(self, superclasses: list[str]) -> Union[str, None]:
return self._get_baseclass(gaga.superclasses)
return None

def get_resclass(self, prefixedresclass: str) -> Type:
def get_resclass_type(self, prefixedresclass: str) -> Type:
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved
prefix, resclass_name = prefixedresclass.split(':')
resclass = [x for x in self._ontologies[prefix].resource_classes if x.name == resclass_name][0]
baseclass = self._get_baseclass(resclass.superclasses)
Expand Down
7 changes: 3 additions & 4 deletions knora/dsplib/models/sipi.py
@@ -1,7 +1,6 @@
import os

import requests

from typing import Any
from .helpers import BaseError


Expand Down Expand Up @@ -30,7 +29,7 @@ def __init__(self, sipi_server: str, token: str):
self.sipi_server = sipi_server
self.token = token

def upload_bitstream(self, filepath):
def upload_bitstream(self, filepath: str) -> dict[Any, Any]:
"""
Uploads a bitstream to the Sipi server

Expand All @@ -45,5 +44,5 @@ def upload_bitstream(self, filepath):
req = requests.post(self.sipi_server + "/upload?token=" + self.token, files=files)
on_api_error(req)
print(f'Uploaded file {filepath}')
res = req.json()
res: dict[Any, Any] = req.json()
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved
return res
10 changes: 5 additions & 5 deletions knora/dsplib/models/value.py
Expand Up @@ -16,21 +16,21 @@ class KnoraStandoffXml:
__iriregexp = re.compile(r'IRI:[^:]*:IRI')
__xmlstr: str

def __init__(self, xmlstr: str) -> str:
def __init__(self, xmlstr: str) -> None:
self.__xmlstr = str(xmlstr)

def __str__(self) -> str:
return self.__xmlstr

def getXml(self) -> str:
return self.__xmlstr

def findall(self) -> Union[list[str], None]:
def get_all_iris(self) -> Optional[list[str]]:
return self.__iriregexp.findall(self.__xmlstr)

def replace(self, fromStr: str, toStr: str) -> None:
self.__xmlstr = self.__xmlstr.replace(fromStr, toStr)

def regex_replace(self, pattern: str, repl: str) -> None:
self.__xmlstr = re.sub(pattern=repr(pattern)[1:-1], repl=repl, string=self.__xmlstr)


@strict
class Value:
Expand Down
144 changes: 133 additions & 11 deletions knora/dsplib/utils/onto_validate.py
@@ -1,14 +1,13 @@
import json
import os
from typing import Dict, Union

import re
from typing import Any, Union, List, Set
import jsonschema
from jsonschema import validate

import jsonpath_ng, jsonpath_ng.ext
from ..utils.expand_all_lists import expand_lists_from_excel


def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool:
def validate_ontology(input_file_or_json: Union[str, dict[Any, Any], 'os.PathLike[Any]']) -> bool:
"""
Validates an ontology against the knora schema
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved

Expand All @@ -18,8 +17,8 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool
Returns:
True if ontology passed validation, False otherwise
"""
data_model = ''

data_model: dict[Any, Any] = {}
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(input_file_or_json, dict):
data_model = input_file_or_json
elif os.path.isfile(input_file_or_json):
Expand All @@ -38,15 +37,138 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool

# validate the data model against the schema
current_dir = os.path.dirname(os.path.realpath(__file__))

with open(os.path.join(current_dir, '../schemas/ontology.json')) as s:
schema = json.load(s)

try:
validate(instance=data_model, schema=schema)
jsonschema.validate(instance=data_model, schema=schema)
except jsonschema.exceptions.ValidationError as err:
print(f'Data model did not pass validation. The error message is: {err.message}\n'
f'The error occurred at {err.json_path}')
return False
print('Data model is syntactically correct and passed validation.')
return True

# cardinalities check for circular references
if check_cardinalities_of_circular_references(data_model):
print('Data model is syntactically correct and passed validation.')
return True
else:
return False


def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bool:
"""
Check if there are properties derived from hasLinkTo that form a circular reference. If so, these
properties must have the cardinality 0-1 or 0-n, because during the xmlupload process, these values
are temporarily removed.
"""

# search the ontology for all properties that are derived from hasLinkTo, store them in a dict, and map
# them to their objects (i.e. the resource classes they point to)
# example: if the property 'rosetta:hasTextMedium' points to 'rosetta:Image2D':
# link_properties = {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}
ontos = data_model['project']['ontologies']
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved
link_properties: dict[str, List[str]] = dict()
for index, onto in enumerate(ontos):
hasLinkTo_matches = jsonpath_ng.ext.parse(
f'$.project.ontologies[{index}].properties[?@.super[*] == hasLinkTo]'
).find(data_model)
prop_obj_pair: dict[str, List[str]] = dict()
for match in hasLinkTo_matches:
prop = onto['name'] + ':' + match.value['name']
target = match.value['object']
if target != 'Resource':
# make the target a fully qualified name (with the ontology's name prefixed)
target = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', target)
prop_obj_pair[prop] = [target]
link_properties.update(prop_obj_pair)

# in case the object of a property is "Resource", the link can point to any resource class
all_res_names: List[str] = list()
for index, onto in enumerate(ontos):
matches = jsonpath_ng.ext.parse(f'$.resources[*].name').find(onto)
tmp = [f'{onto["name"]}:{match.value}' for match in matches]
all_res_names.extend(tmp)
for prop, targ in link_properties.items():
if 'Resource' in targ:
link_properties[prop] = all_res_names

# make a dict that maps resource classes to their hasLinkTo-properties, and to the classes they point to
# example: if 'rosetta:Text' has the property 'rosetta:hasTextMedium' that points to 'rosetta:Image2D':
# dependencies = {'rosetta:Text': {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}}
dependencies: dict[str, dict[str, List[str]]] = dict()
for onto in ontos:
for resource in onto['resources']:
resname: str = onto['name'] + ':' + resource['name']
for card in resource['cardinalities']:
# make the cardinality a fully qualified name (with the ontology's name prefixed)
cardname = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', card['propname'])
if cardname in link_properties:
# Look out: if `targets` is created with `targets = link_properties[cardname]`, the ex-
# pression `dependencies[resname][cardname] = targets` causes `dependencies[resname][cardname]`
# to point to `link_properties[cardname]`. Due to that, the expression
# `dependencies[resname][cardname].extend(targets)` will modify 'link_properties'!
# For this reason, `targets` must be created with `targets = list(link_properties[cardname])`
targets = list(link_properties[cardname])
if resname not in dependencies:
dependencies[resname] = dict()
dependencies[resname][cardname] = targets
elif cardname not in dependencies[resname]:
dependencies[resname][cardname] = targets
else:
dependencies[resname][cardname].extend(targets)

# iteratively purge dependencies from non-circular references
for _ in range(30):
# remove targets that point to a resource that is not in dependencies,
# remove cardinalities that have no targets
for res, cards in dependencies.copy().items():
for card, targets in cards.copy().items():
dependencies[res][card] = [target for target in targets if target in dependencies]
if len(dependencies[res][card]) == 0:
del dependencies[res][card]
# remove resources that have no cardinalities
dependencies = {res: cards for res, cards in dependencies.items() if len(cards) > 0}
# remove resources that are not pointed to by any target
all_targets: Set[str] = set()
for cards in dependencies.values():
for trgt in cards.values():
all_targets = all_targets | set(trgt)
dependencies = {res: targets for res, targets in dependencies.items() if res in all_targets}

# check the remaining dependencies (which are only the circular ones) if they have all 0-1 or 0-n
ok_cardinalities = ['0-1', '0-n']
notok_dependencies: dict[str, List[str]] = dict()
for res, cards in dependencies.items():
ontoname, resname = res.split(':')
for card in cards:
# the name of the cardinality could be with prepended onto, only with colon, or without anything
card_without_colon = card.split(':')[1]
card_with_colon = ':' + card_without_colon
card_variations = [card, card_with_colon, card_without_colon]
for card_variation in card_variations:
match = jsonpath_ng.ext.parse(
f'$[?@.name == {ontoname}].resources[?@.name == {resname}].cardinalities[?@.propname == "{card_variation}"]'
).find(ontos)
if len(match) > 0:
break
card_numbers = match[0].value['cardinality']
if card_numbers not in ok_cardinalities:
if res not in notok_dependencies:
notok_dependencies[res] = [card]
else:
notok_dependencies[res].append(card)

if len(notok_dependencies) == 0:
return True
else:
print('ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references '
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved
'between resources. This is not a problem in itself, but if you try to upload data that actually '
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved
'contains circular references, these "hasLinkTo" cardinalities will be temporarily removed from the '
'affected resources. Therefore, it is necessary that the involved "hasLinkTo" cardinalities have a '
'cardinality of 0-1 or 0-n. \n'
'Please make sure that the following cardinalities have a cardinality of 0-1 or 0-n:')
for _res, _cards in notok_dependencies.items():
print(_res)
for card in _cards:
print(f'\t{card}')
return False