dasch-swiss · jnussbaum · Mar 25, 2022 · Feb 25, 2022 · Feb 28, 2022 · Feb 28, 2022
diff --git a/knora/dsplib/models/resource.py b/knora/dsplib/models/resource.py
@@ -25,7 +25,7 @@ class KnoraStandoffXmlEncoder(json.JSONEncoder):
 
     def default(self, obj) -> str:
         if isinstance(obj, KnoraStandoffXml):
-            return '<?xml version="1.0" encoding="UTF-8"?>\n<text>' + obj.getXml() + '</text>'
+            return '<?xml version="1.0" encoding="UTF-8"?>\n<text>' + str(obj) + '</text>'
         elif isinstance(obj, OntoInfo):
             return obj.iri + "#" if obj.hashtag else ""
         return json.JSONEncoder.default(self, obj)
@@ -275,10 +275,9 @@ def toJsonLdObj(self, action: Actions) -> Any:
             tmp['@context'] = self.context
         return tmp
 
-    def create(self):
+    def create(self) -> 'ResourceInstance':
         jsonobj = self.toJsonLdObj(Actions.Create)
         jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '), cls=KnoraStandoffXmlEncoder)
-        # print("jsondata", jsondata)
         result = self._con.post(ResourceInstance.ROUTE, jsondata)
         newinstance = self.clone()
         newinstance._iri = result['@id']
@@ -394,7 +393,7 @@ def _get_baseclass(self, superclasses: list[str]) -> Union[str, None]:
             return self._get_baseclass(gaga.superclasses)
         return None
 
-    def get_resclass(self, prefixedresclass: str) -> Type:
+    def get_resclass_type(self, prefixedresclass: str) -> Type:
         prefix, resclass_name = prefixedresclass.split(':')
         resclass = [x for x in self._ontologies[prefix].resource_classes if x.name == resclass_name][0]
         baseclass = self._get_baseclass(resclass.superclasses)

diff --git a/knora/dsplib/models/sipi.py b/knora/dsplib/models/sipi.py
@@ -1,7 +1,6 @@
 import os
-
 import requests
-
+from typing import Any
 from .helpers import BaseError
 
 
@@ -30,7 +29,7 @@ def __init__(self, sipi_server: str, token: str):
         self.sipi_server = sipi_server
         self.token = token
 
-    def upload_bitstream(self, filepath):
+    def upload_bitstream(self, filepath: str) -> dict[Any, Any]:
         """
         Uploads a bitstream to the Sipi server
 
@@ -45,5 +44,5 @@ def upload_bitstream(self, filepath):
             req = requests.post(self.sipi_server + "/upload?token=" + self.token, files=files)
         on_api_error(req)
         print(f'Uploaded file {filepath}')
-        res = req.json()
+        res: dict[Any, Any] = req.json()
         return res
diff --git a/knora/dsplib/models/value.py b/knora/dsplib/models/value.py
@@ -16,21 +16,21 @@ class KnoraStandoffXml:
     __iriregexp = re.compile(r'IRI:[^:]*:IRI')
     __xmlstr: str
 
-    def __init__(self, xmlstr: str) -> str:
+    def __init__(self, xmlstr: str) -> None:
         self.__xmlstr = str(xmlstr)
 
     def __str__(self) -> str:
         return self.__xmlstr
 
-    def getXml(self) -> str:
-        return self.__xmlstr
-
-    def findall(self) -> Union[list[str], None]:
+    def get_all_iris(self) -> Optional[list[str]]:
         return self.__iriregexp.findall(self.__xmlstr)
 
     def replace(self, fromStr: str, toStr: str) -> None:
         self.__xmlstr = self.__xmlstr.replace(fromStr, toStr)
 
+    def regex_replace(self, pattern: str, repl: str) -> None:
+        self.__xmlstr = re.sub(pattern=repr(pattern)[1:-1], repl=repl, string=self.__xmlstr)
+
 
 @strict
 class Value:

diff --git a/knora/dsplib/utils/onto_validate.py b/knora/dsplib/utils/onto_validate.py
@@ -1,14 +1,13 @@
 import json
 import os
-from typing import Dict, Union
-
+import re
+from typing import Any, Union, List, Set
 import jsonschema
-from jsonschema import validate
-
+import jsonpath_ng, jsonpath_ng.ext
 from ..utils.expand_all_lists import expand_lists_from_excel
 
 
-def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool:
+def validate_ontology(input_file_or_json: Union[str, dict[Any, Any], 'os.PathLike[Any]']) -> bool:
     """
     Validates an ontology against the knora schema
 
@@ -18,8 +17,8 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool
     Returns:
         True if ontology passed validation, False otherwise
     """
-    data_model = ''
 
+    data_model: dict[Any, Any] = {}
     if isinstance(input_file_or_json, dict):
         data_model = input_file_or_json
     elif os.path.isfile(input_file_or_json):
@@ -38,15 +37,138 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool
 
     # validate the data model against the schema
     current_dir = os.path.dirname(os.path.realpath(__file__))
-
     with open(os.path.join(current_dir, '../schemas/ontology.json')) as s:
         schema = json.load(s)
-
     try:
-        validate(instance=data_model, schema=schema)
+        jsonschema.validate(instance=data_model, schema=schema)
     except jsonschema.exceptions.ValidationError as err:
         print(f'Data model did not pass validation. The error message is: {err.message}\n'
               f'The error occurred at {err.json_path}')
         return False
-    print('Data model is syntactically correct and passed validation.')
-    return True
+
+    # cardinalities check for circular references
+    if check_cardinalities_of_circular_references(data_model):
+        print('Data model is syntactically correct and passed validation.')
+        return True
+    else:
+        return False
+
+
+def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bool:
+    """
+    Check if there are properties derived from hasLinkTo that form a circular reference. If so, these
+    properties must have the cardinality 0-1 or 0-n, because during the xmlupload process, these values
+    are temporarily removed.
+    """
+
+    # search the ontology for all properties that are derived from hasLinkTo, store them in a dict, and map
+    # them to their objects (i.e. the resource classes they point to)
+    # example: if the property 'rosetta:hasTextMedium' points to 'rosetta:Image2D':
+    # link_properties = {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}
+    ontos = data_model['project']['ontologies']
+    link_properties: dict[str, List[str]] = dict()
+    for index, onto in enumerate(ontos):
+        hasLinkTo_matches = jsonpath_ng.ext.parse(
+            f'$.project.ontologies[{index}].properties[?@.super[*] == hasLinkTo]'
+        ).find(data_model)
+        prop_obj_pair: dict[str, List[str]] = dict()
+        for match in hasLinkTo_matches:
+            prop = onto['name'] + ':' + match.value['name']
+            target = match.value['object']
+            if target != 'Resource':
+                # make the target a fully qualified name (with the ontology's name prefixed)
+                target = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', target)
+            prop_obj_pair[prop] = [target]
+        link_properties.update(prop_obj_pair)
+
+    # in case the object of a property is "Resource", the link can point to any resource class
+    all_res_names: List[str] = list()
+    for index, onto in enumerate(ontos):
+        matches = jsonpath_ng.ext.parse(f'$.resources[*].name').find(onto)
+        tmp = [f'{onto["name"]}:{match.value}' for match in matches]
+        all_res_names.extend(tmp)
+    for prop, targ in link_properties.items():
+        if 'Resource' in targ:
+            link_properties[prop] = all_res_names
+
+    # make a dict that maps resource classes to their hasLinkTo-properties, and to the classes they point to
+    # example: if 'rosetta:Text' has the property 'rosetta:hasTextMedium' that points to 'rosetta:Image2D':
+    # dependencies = {'rosetta:Text': {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}}
+    dependencies: dict[str, dict[str, List[str]]] = dict()
+    for onto in ontos:
+        for resource in onto['resources']:
+            resname: str = onto['name'] + ':' + resource['name']
+            for card in resource['cardinalities']:
+                # make the cardinality a fully qualified name (with the ontology's name prefixed)
+                cardname = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', card['propname'])
+                if cardname in link_properties:
+                    # Look out: if `targets` is created with `targets = link_properties[cardname]`, the ex-
+                    # pression `dependencies[resname][cardname] = targets` causes `dependencies[resname][cardname]`
+                    # to point to `link_properties[cardname]`. Due to that, the expression
+                    # `dependencies[resname][cardname].extend(targets)` will modify 'link_properties'!
+                    # For this reason, `targets` must be created with `targets = list(link_properties[cardname])`
+                    targets = list(link_properties[cardname])
+                    if resname not in dependencies:
+                        dependencies[resname] = dict()
+                        dependencies[resname][cardname] = targets
+                    elif cardname not in dependencies[resname]:
+                        dependencies[resname][cardname] = targets
+                    else:
+                        dependencies[resname][cardname].extend(targets)
+
+    # iteratively purge dependencies from non-circular references
+    for _ in range(30):
+        # remove targets that point to a resource that is not in dependencies,
+        # remove cardinalities that have no targets
+        for res, cards in dependencies.copy().items():
+            for card, targets in cards.copy().items():
+                dependencies[res][card] = [target for target in targets if target in dependencies]
+                if len(dependencies[res][card]) == 0:
+                    del dependencies[res][card]
+        # remove resources that have no cardinalities
+        dependencies = {res: cards for res, cards in dependencies.items() if len(cards) > 0}
+        # remove resources that are not pointed to by any target
+        all_targets: Set[str] = set()
+        for cards in dependencies.values():
+            for trgt in cards.values():
+                all_targets = all_targets | set(trgt)
+        dependencies = {res: targets for res, targets in dependencies.items() if res in all_targets}
+
+    # check the remaining dependencies (which are only the circular ones) if they have all 0-1 or 0-n
+    ok_cardinalities = ['0-1', '0-n']
+    notok_dependencies: dict[str, List[str]] = dict()
+    for res, cards in dependencies.items():
+        ontoname, resname = res.split(':')
+        for card in cards:
+            # the name of the cardinality could be with prepended onto, only with colon, or without anything
+            card_without_colon = card.split(':')[1]
+            card_with_colon = ':' + card_without_colon
+            card_variations = [card, card_with_colon, card_without_colon]
+            for card_variation in card_variations:
+                match = jsonpath_ng.ext.parse(
+                    f'$[?@.name == {ontoname}].resources[?@.name == {resname}].cardinalities[?@.propname == "{card_variation}"]'
+                ).find(ontos)
+                if len(match) > 0:
+                    break
+            card_numbers = match[0].value['cardinality']
+            if card_numbers not in ok_cardinalities:
+                if res not in notok_dependencies:
+                    notok_dependencies[res] = [card]
+                else:
+                    notok_dependencies[res].append(card)
+
+    if len(notok_dependencies) == 0:
+        return True
+    else:
+        print('ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references '
+              'between resources. This is not a problem in itself, but if you try to upload data that actually '
+              'contains circular references, these "hasLinkTo" cardinalities will be temporarily removed from the '
+              'affected resources. Therefore, it is necessary that the involved "hasLinkTo" cardinalities have a '
+              'cardinality of 0-1 or 0-n. \n'
+              'Please make sure that the following cardinalities have a cardinality of 0-1 or 0-n:')
+        for _res, _cards in notok_dependencies.items():
+            print(_res)
+            for card in _cards:
+                print(f'\t{card}')
+        return False
+