From 75a444ff5ef586238139ac2f091876b22ef2a671 Mon Sep 17 00:00:00 2001
From: Johannes Nussbaum <39048939+jnussbaum@users.noreply.github.com>
Date: Fri, 25 Mar 2022 09:18:03 +0100
Subject: [PATCH] feat(xmlupload): allow circular references (DEV-577) (#165)
---
knora/dsplib/models/resource.py | 7 +-
knora/dsplib/models/sipi.py | 7 +-
knora/dsplib/models/value.py | 10 +-
knora/dsplib/utils/onto_validate.py | 144 ++++++++++++-
knora/dsplib/utils/xml_upload.py | 322 +++++++++++++++++++++++-----
test/e2e/test_resource.py | 4 +-
test/e2e/test_tools.py | 2 +-
testdata/test-data.xml | 69 +++++-
testdata/test-onto.json | 16 ++
9 files changed, 495 insertions(+), 86 deletions(-)
diff --git a/knora/dsplib/models/resource.py b/knora/dsplib/models/resource.py
index 671cbcb57..6427e887f 100644
--- a/knora/dsplib/models/resource.py
+++ b/knora/dsplib/models/resource.py
@@ -25,7 +25,7 @@ class KnoraStandoffXmlEncoder(json.JSONEncoder):
def default(self, obj) -> str:
if isinstance(obj, KnoraStandoffXml):
- return '\n' + obj.getXml() + ''
+ return '\n' + str(obj) + ''
elif isinstance(obj, OntoInfo):
return obj.iri + "#" if obj.hashtag else ""
return json.JSONEncoder.default(self, obj)
@@ -275,10 +275,9 @@ def toJsonLdObj(self, action: Actions) -> Any:
tmp['@context'] = self.context
return tmp
- def create(self):
+ def create(self) -> 'ResourceInstance':
jsonobj = self.toJsonLdObj(Actions.Create)
jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '), cls=KnoraStandoffXmlEncoder)
- # print("jsondata", jsondata)
result = self._con.post(ResourceInstance.ROUTE, jsondata)
newinstance = self.clone()
newinstance._iri = result['@id']
@@ -394,7 +393,7 @@ def _get_baseclass(self, superclasses: list[str]) -> Union[str, None]:
return self._get_baseclass(gaga.superclasses)
return None
- def get_resclass(self, prefixedresclass: str) -> Type:
+ def get_resclass_type(self, prefixedresclass: str) -> Type:
prefix, resclass_name = prefixedresclass.split(':')
resclass = [x for x in self._ontologies[prefix].resource_classes if x.name == resclass_name][0]
baseclass = self._get_baseclass(resclass.superclasses)
diff --git a/knora/dsplib/models/sipi.py b/knora/dsplib/models/sipi.py
index c8a25b717..3cb026a4d 100644
--- a/knora/dsplib/models/sipi.py
+++ b/knora/dsplib/models/sipi.py
@@ -1,7 +1,6 @@
import os
-
import requests
-
+from typing import Any
from .helpers import BaseError
@@ -30,7 +29,7 @@ def __init__(self, sipi_server: str, token: str):
self.sipi_server = sipi_server
self.token = token
- def upload_bitstream(self, filepath):
+ def upload_bitstream(self, filepath: str) -> dict[Any, Any]:
"""
Uploads a bitstream to the Sipi server
@@ -45,5 +44,5 @@ def upload_bitstream(self, filepath):
req = requests.post(self.sipi_server + "/upload?token=" + self.token, files=files)
on_api_error(req)
print(f'Uploaded file {filepath}')
- res = req.json()
+ res: dict[Any, Any] = req.json()
return res
diff --git a/knora/dsplib/models/value.py b/knora/dsplib/models/value.py
index c3af54204..bf98020c7 100644
--- a/knora/dsplib/models/value.py
+++ b/knora/dsplib/models/value.py
@@ -16,21 +16,21 @@ class KnoraStandoffXml:
__iriregexp = re.compile(r'IRI:[^:]*:IRI')
__xmlstr: str
- def __init__(self, xmlstr: str) -> str:
+ def __init__(self, xmlstr: str) -> None:
self.__xmlstr = str(xmlstr)
def __str__(self) -> str:
return self.__xmlstr
- def getXml(self) -> str:
- return self.__xmlstr
-
- def findall(self) -> Union[list[str], None]:
+ def get_all_iris(self) -> Optional[list[str]]:
return self.__iriregexp.findall(self.__xmlstr)
def replace(self, fromStr: str, toStr: str) -> None:
self.__xmlstr = self.__xmlstr.replace(fromStr, toStr)
+ def regex_replace(self, pattern: str, repl: str) -> None:
+ self.__xmlstr = re.sub(pattern=repr(pattern)[1:-1], repl=repl, string=self.__xmlstr)
+
@strict
class Value:
diff --git a/knora/dsplib/utils/onto_validate.py b/knora/dsplib/utils/onto_validate.py
index 6367a2ffb..5b97046ce 100644
--- a/knora/dsplib/utils/onto_validate.py
+++ b/knora/dsplib/utils/onto_validate.py
@@ -1,14 +1,13 @@
import json
import os
-from typing import Dict, Union
-
+import re
+from typing import Any, Union, List, Set
import jsonschema
-from jsonschema import validate
-
+import jsonpath_ng, jsonpath_ng.ext
from ..utils.expand_all_lists import expand_lists_from_excel
-def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool:
+def validate_ontology(input_file_or_json: Union[str, dict[Any, Any], 'os.PathLike[Any]']) -> bool:
"""
Validates an ontology against the knora schema
@@ -18,8 +17,8 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool
Returns:
True if ontology passed validation, False otherwise
"""
- data_model = ''
+ data_model: dict[Any, Any] = {}
if isinstance(input_file_or_json, dict):
data_model = input_file_or_json
elif os.path.isfile(input_file_or_json):
@@ -38,15 +37,138 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool
# validate the data model against the schema
current_dir = os.path.dirname(os.path.realpath(__file__))
-
with open(os.path.join(current_dir, '../schemas/ontology.json')) as s:
schema = json.load(s)
-
try:
- validate(instance=data_model, schema=schema)
+ jsonschema.validate(instance=data_model, schema=schema)
except jsonschema.exceptions.ValidationError as err:
print(f'Data model did not pass validation. The error message is: {err.message}\n'
f'The error occurred at {err.json_path}')
return False
- print('Data model is syntactically correct and passed validation.')
- return True
+
+ # cardinalities check for circular references
+ if check_cardinalities_of_circular_references(data_model):
+ print('Data model is syntactically correct and passed validation.')
+ return True
+ else:
+ return False
+
+
+def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bool:
+ """
+ Check if there are properties derived from hasLinkTo that form a circular reference. If so, these
+ properties must have the cardinality 0-1 or 0-n, because during the xmlupload process, these values
+ are temporarily removed.
+ """
+
+ # search the ontology for all properties that are derived from hasLinkTo, store them in a dict, and map
+ # them to their objects (i.e. the resource classes they point to)
+ # example: if the property 'rosetta:hasTextMedium' points to 'rosetta:Image2D':
+ # link_properties = {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}
+ ontos = data_model['project']['ontologies']
+ link_properties: dict[str, List[str]] = dict()
+ for index, onto in enumerate(ontos):
+ hasLinkTo_matches = jsonpath_ng.ext.parse(
+ f'$.project.ontologies[{index}].properties[?@.super[*] == hasLinkTo]'
+ ).find(data_model)
+ prop_obj_pair: dict[str, List[str]] = dict()
+ for match in hasLinkTo_matches:
+ prop = onto['name'] + ':' + match.value['name']
+ target = match.value['object']
+ if target != 'Resource':
+ # make the target a fully qualified name (with the ontology's name prefixed)
+ target = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', target)
+ prop_obj_pair[prop] = [target]
+ link_properties.update(prop_obj_pair)
+
+ # in case the object of a property is "Resource", the link can point to any resource class
+ all_res_names: List[str] = list()
+ for index, onto in enumerate(ontos):
+ matches = jsonpath_ng.ext.parse(f'$.resources[*].name').find(onto)
+ tmp = [f'{onto["name"]}:{match.value}' for match in matches]
+ all_res_names.extend(tmp)
+ for prop, targ in link_properties.items():
+ if 'Resource' in targ:
+ link_properties[prop] = all_res_names
+
+ # make a dict that maps resource classes to their hasLinkTo-properties, and to the classes they point to
+ # example: if 'rosetta:Text' has the property 'rosetta:hasTextMedium' that points to 'rosetta:Image2D':
+ # dependencies = {'rosetta:Text': {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}}
+ dependencies: dict[str, dict[str, List[str]]] = dict()
+ for onto in ontos:
+ for resource in onto['resources']:
+ resname: str = onto['name'] + ':' + resource['name']
+ for card in resource['cardinalities']:
+ # make the cardinality a fully qualified name (with the ontology's name prefixed)
+ cardname = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', card['propname'])
+ if cardname in link_properties:
+ # Look out: if `targets` is created with `targets = link_properties[cardname]`, the ex-
+ # pression `dependencies[resname][cardname] = targets` causes `dependencies[resname][cardname]`
+ # to point to `link_properties[cardname]`. Due to that, the expression
+ # `dependencies[resname][cardname].extend(targets)` will modify 'link_properties'!
+ # For this reason, `targets` must be created with `targets = list(link_properties[cardname])`
+ targets = list(link_properties[cardname])
+ if resname not in dependencies:
+ dependencies[resname] = dict()
+ dependencies[resname][cardname] = targets
+ elif cardname not in dependencies[resname]:
+ dependencies[resname][cardname] = targets
+ else:
+ dependencies[resname][cardname].extend(targets)
+
+ # iteratively purge dependencies from non-circular references
+ for _ in range(30):
+ # remove targets that point to a resource that is not in dependencies,
+ # remove cardinalities that have no targets
+ for res, cards in dependencies.copy().items():
+ for card, targets in cards.copy().items():
+ dependencies[res][card] = [target for target in targets if target in dependencies]
+ if len(dependencies[res][card]) == 0:
+ del dependencies[res][card]
+ # remove resources that have no cardinalities
+ dependencies = {res: cards for res, cards in dependencies.items() if len(cards) > 0}
+ # remove resources that are not pointed to by any target
+ all_targets: Set[str] = set()
+ for cards in dependencies.values():
+ for trgt in cards.values():
+ all_targets = all_targets | set(trgt)
+ dependencies = {res: targets for res, targets in dependencies.items() if res in all_targets}
+
+ # check the remaining dependencies (which are only the circular ones) if they have all 0-1 or 0-n
+ ok_cardinalities = ['0-1', '0-n']
+ notok_dependencies: dict[str, List[str]] = dict()
+ for res, cards in dependencies.items():
+ ontoname, resname = res.split(':')
+ for card in cards:
+ # the name of the cardinality could be with prepended onto, only with colon, or without anything
+ card_without_colon = card.split(':')[1]
+ card_with_colon = ':' + card_without_colon
+ card_variations = [card, card_with_colon, card_without_colon]
+ for card_variation in card_variations:
+ match = jsonpath_ng.ext.parse(
+ f'$[?@.name == {ontoname}].resources[?@.name == {resname}].cardinalities[?@.propname == "{card_variation}"]'
+ ).find(ontos)
+ if len(match) > 0:
+ break
+ card_numbers = match[0].value['cardinality']
+ if card_numbers not in ok_cardinalities:
+ if res not in notok_dependencies:
+ notok_dependencies[res] = [card]
+ else:
+ notok_dependencies[res].append(card)
+
+ if len(notok_dependencies) == 0:
+ return True
+ else:
+ print('ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references '
+ 'between resources. This is not a problem in itself, but if you try to upload data that actually '
+ 'contains circular references, these "hasLinkTo" cardinalities will be temporarily removed from the '
+ 'affected resources. Therefore, it is necessary that the involved "hasLinkTo" cardinalities have a '
+ 'cardinality of 0-1 or 0-n. \n'
+ 'Please make sure that the following cardinalities have a cardinality of 0-1 or 0-n:')
+ for _res, _cards in notok_dependencies.items():
+ print(_res)
+ for card in _cards:
+ print(f'\t{card}')
+ return False
+
diff --git a/knora/dsplib/utils/xml_upload.py b/knora/dsplib/utils/xml_upload.py
index 5ae0f947e..708770046 100644
--- a/knora/dsplib/utils/xml_upload.py
+++ b/knora/dsplib/utils/xml_upload.py
@@ -8,7 +8,8 @@
import uuid
from datetime import datetime
from pathlib import Path
-from typing import Optional, Union
+from typing import Optional, Union, cast, Tuple
+from urllib.parse import quote_plus
from lxml import etree
@@ -17,7 +18,7 @@
from knora.dsplib.models.helpers import BaseError
from knora.dsplib.models.permission import Permissions
from knora.dsplib.models.project import Project
-from knora.dsplib.models.resource import ResourceInstanceFactory, ResourceInstance
+from knora.dsplib.models.resource import ResourceInstanceFactory, ResourceInstance, KnoraStandoffXmlEncoder
from knora.dsplib.models.sipi import Sipi
from knora.dsplib.models.value import KnoraStandoffXml
@@ -29,7 +30,7 @@ class XmlError(BaseException):
def __init__(self, msg: str):
self._message = msg
- def __str__(self):
+ def __str__(self) -> str:
return 'XML-ERROR: ' + self._message
@@ -122,7 +123,7 @@ def __init__(self, node: etree.Element, val_type: str, listname: Optional[str] =
xmlstr = xmlstr.replace('', '')
xmlstr = xmlstr.replace('', '')
self._value = KnoraStandoffXml(xmlstr)
- tmp_id_list = self._value.findall()
+ tmp_id_list = self._value.get_all_iris()
if tmp_id_list:
refs = set()
for tmp_id in tmp_id_list:
@@ -139,11 +140,19 @@ def value(self) -> Union[str, KnoraStandoffXml]:
"""The actual value of the value instance"""
return self._value
+ @value.setter
+ def value(self, value: Union[str, KnoraStandoffXml]) -> None:
+ self._value = value
+
@property
def resrefs(self) -> Optional[list[str]]:
"""List of resource references"""
return self._resrefs
+ @resrefs.setter
+ def resrefs(self, resrefs: Optional[list[str]]) -> None:
+ self._resrefs = resrefs
+
@property
def comment(self) -> str:
"""Comment about the value"""
@@ -313,6 +322,14 @@ def bitstream(self) -> Optional[XMLBitstream]:
"""The bitstream object belonging to the resource"""
return self._bitstream
+ @property
+ def properties(self) -> list[XMLProperty]:
+ return self._properties
+
+ @properties.setter
+ def properties(self, new_properties: list[XMLProperty]) -> None:
+ self._properties = new_properties
+
def print(self) -> None:
"""Prints the resource and its attributes."""
print(f'Resource: id={self._id}, restype: {self._restype}, label: {self._label}')
@@ -321,6 +338,22 @@ def print(self) -> None:
for prop in self._properties:
prop.print()
+ def get_props_with_links(self) -> list[XMLProperty]:
+ """
+ Get a list of all XMLProperties that have an outgoing link to another resource, be it a resptr-prop link
+ or a standoff link in a text.
+ """
+ link_properties: list[XMLProperty] = []
+ for prop in self._properties:
+ if prop.valtype == 'resptr':
+ link_properties.append(prop)
+ elif prop.valtype == 'text':
+ for value in prop.values:
+ if value.resrefs:
+ link_properties.append(prop)
+ break
+ return link_properties
+
def get_resptrs(self) -> list[str]:
"""
Get a list of all resource id's that are referenced by this resource
@@ -332,16 +365,20 @@ def get_resptrs(self) -> list[str]:
for prop in self._properties:
if prop.valtype == 'resptr':
for value in prop.values:
- resptrs.append(value.value)
+ resptrs.append(str(value.value))
elif prop.valtype == 'text':
for value in prop.values:
- if value.resrefs is not None:
+ if value.resrefs:
resptrs.extend(value.resrefs)
return resptrs
- def get_propvals(self, resiri_lookup: dict[str, str], permissions_lookup: dict[str, Permissions]) -> dict[str, Permissions]:
+ def get_propvals(
+ self,
+ resiri_lookup: dict[str, str],
+ permissions_lookup: dict[str, Permissions]
+ ) -> dict[str, Union[list[Union[str, dict[str, str]]], str, dict[str, str]]]:
"""
- Get a dictionary of the property names and their values belonging to a resource
+ Get a dictionary of the property names and their values. Replace the internal ids by their IRI first.
Args:
resiri_lookup: Is used to solve internal unique id's of resources to real IRI's
@@ -363,7 +400,7 @@ def get_propvals(self, resiri_lookup: dict[str, str], permissions_lookup: dict[s
v = value.value # if we do not find the id, we assume it's a valid knora IRI
elif prop.valtype == 'text':
if isinstance(value.value, KnoraStandoffXml):
- iri_refs = value.value.findall()
+ iri_refs = value.value.get_all_iris()
for iri_ref in iri_refs:
res_id = iri_ref.split(':')[1]
iri = resiri_lookup.get(res_id)
@@ -495,37 +532,44 @@ def get_permission_instance(self) -> Permissions:
permissions.add(allow.permission, allow.group)
return permissions
- def __str__(self):
+ def __str__(self) -> str:
allow_str: list[str] = []
for allow in self._allows:
allow_str.append("{} {}".format(allow.permission, allow.group))
return '|'.join(allow_str)
- def print(self):
+ def print(self) -> None:
"""Prints the permission set"""
print('Permission: ', self._id)
for a in self._allows:
a.print()
-def do_sort_order(resources: list[XMLResource], verbose) -> list[XMLResource]:
+def remove_circular_references(resources: list[XMLResource], verbose: bool) -> \
+ tuple[list[XMLResource],
+ dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
+ dict[XMLResource, dict[XMLProperty, list[str]]]
+ ]:
"""
- Sorts a list of resources.
-
- Resources that reference other resources are added after the referenced resources. The method will report circular
- references and exit with an error if there are any unresolvable references.
+ Temporarily removes problematic resource-references from a list of resources. A reference is problematic if
+ it creates a circle (circular references).
Args:
- resources: list of resources to sort
+ resources: list of resources that possibly contain circular references
verbose: verbose output if True
Returns:
- sorted list of resources
+ list: list of cleaned resources
+ stashed_xml_texts: dict with the stashed XML texts
+ stashed_resptr_props: dict with the stashed resptr-props
"""
if verbose:
print("Checking resources for unresolvable references...")
+ stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]] = {}
+ stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]] = {}
+
# sort the resources according to outgoing resptrs
ok_resources: list[XMLResource] = []
nok_resources: list[XMLResource] = []
@@ -550,20 +594,77 @@ def do_sort_order(resources: list[XMLResource], verbose) -> list[XMLResource]:
nok_resources.append(resource)
resources = nok_resources
if len(nok_resources) == nok_len:
- print("ERROR Unable to resolve all resptr dependencies.")
- for res in nok_resources:
- unresolvable_resptrs = []
- for resptr_id in res.get_resptrs():
- if resptr_id not in ok_res_ids:
- unresolvable_resptrs.append(resptr_id)
- print(f"\tResource '{res.id}' has unresolvable resptrs to {unresolvable_resptrs}")
- exit(1)
+ # there are circular references. go through all problematic resources, and stash the problematic references.
+ nok_resources, ok_res_ids, ok_resources, stashed_xml_texts, stashed_resptr_props = stash_circular_references(
+ nok_resources,
+ ok_res_ids,
+ ok_resources,
+ stashed_xml_texts,
+ stashed_resptr_props
+ )
nok_len = len(nok_resources)
nok_resources = []
cnt += 1
if verbose:
- print(f'{cnt}. ordering finished.')
- return ok_resources
+ print(f'{cnt}. ordering pass finished.')
+ return ok_resources, stashed_xml_texts, stashed_resptr_props
+
+
+def stash_circular_references(
+ nok_resources: list[XMLResource],
+ ok_res_ids: list[str],
+ ok_resources: list[XMLResource],
+ stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
+ stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]]
+) -> Tuple[
+ list[XMLResource],
+ list[str],
+ list[XMLResource],
+ dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
+ dict[XMLResource, dict[XMLProperty, list[str]]]
+]:
+ for res in nok_resources.copy():
+ for link_prop in res.get_props_with_links():
+ if link_prop.valtype == 'text':
+ for value in link_prop.values:
+ if value.resrefs and not all([_id in ok_res_ids for _id in value.resrefs]):
+ # stash this XML text, replace it by its hash, and remove the
+ # problematic resrefs from the XMLValue's resrefs list
+ value_hash = str(hash(f'{value.value}{datetime.now()}'))
+ if res not in stashed_xml_texts:
+ stashed_xml_texts[res] = {link_prop: {value_hash: cast(KnoraStandoffXml, value.value)}}
+ elif link_prop not in stashed_xml_texts[res]:
+ stashed_xml_texts[res][link_prop] = {value_hash: cast(KnoraStandoffXml, value.value)}
+ else:
+ stashed_xml_texts[res][link_prop][value_hash] = cast(KnoraStandoffXml, value.value)
+ value.value = KnoraStandoffXml(value_hash)
+ value.resrefs = [_id for _id in value.resrefs if _id in ok_res_ids]
+ elif link_prop.valtype == 'resptr':
+ for value in link_prop.values.copy():
+ if value.value not in ok_res_ids:
+ # value.value is the id of the target resource. stash it, then delete it
+ if res not in stashed_resptr_props:
+ stashed_resptr_props[res] = {}
+ stashed_resptr_props[res][link_prop] = [str(value.value)]
+ else:
+ if link_prop not in stashed_resptr_props[res]:
+ stashed_resptr_props[res][link_prop] = [str(value.value)]
+ else:
+ stashed_resptr_props[res][link_prop].append(str(value.value))
+ link_prop.values.remove(value)
+ else:
+ raise BaseError(f'ERROR in remove_circular_references(): link_prop.valtype is '
+ f'neither text nor resptr.')
+
+ if len(link_prop.values) == 0:
+ # if all values of a link property have been stashed, the property needs to be removed
+ res.properties.remove(link_prop)
+
+ ok_resources.append(res)
+ ok_res_ids.append(res.id)
+ nok_resources.remove(res)
+
+ return nok_resources, ok_res_ids, ok_resources, stashed_xml_texts, stashed_resptr_props
def validate_xml_against_schema(input_file: str, schema_file: str) -> bool:
@@ -586,6 +687,7 @@ def validate_xml_against_schema(input_file: str, schema_file: str) -> bool:
print("The input data file cannot be uploaded due to the following validation error(s):")
for error in xmlschema.error_log:
print(f" Line {error.line}: {error.message}")
+ return False
def convert_ark_v0_to_resource_iri(ark: str) -> str:
@@ -628,6 +730,83 @@ def convert_ark_v0_to_resource_iri(ark: str) -> str:
return "http://rdfh.ch/" + project_id + "/" + dsp_uuid
+def update_xml_texts(
+ resource: XMLResource,
+ res_iri: str,
+ link_props: dict[XMLProperty, dict[str, KnoraStandoffXml]],
+ res_iri_lookup: dict[str, str],
+ con: Connection,
+ verbose: bool
+) -> None:
+ existing_resource = con.get(path=f'/v2/resources/{quote_plus(res_iri)}')
+ context = existing_resource['@context']
+ for link_prop, hash_to_value in link_props.items():
+ values = existing_resource[link_prop.name]
+ if not isinstance(values, list):
+ values = [values, ]
+ for value in values:
+ xmltext = value.get("knora-api:textValueAsXml")
+ if xmltext:
+ _hash = re.sub(r'<\?xml.+>(\n)?()(.+)(<\/text>)', r'\3', xmltext)
+ if _hash in hash_to_value:
+ new_xmltext = hash_to_value[_hash]
+ for _id, _iri in res_iri_lookup.items():
+ new_xmltext.regex_replace(f'href="IRI:{_id}:IRI"', f'href="{_iri}"')
+ val_iri = value['@id']
+ jsonobj = {
+ "@id": res_iri,
+ "@type": resource.restype,
+ link_prop.name: {
+ "@id": val_iri,
+ "@type": "knora-api:TextValue",
+ "knora-api:textValueAsXml": new_xmltext,
+ "knora-api:textValueHasMapping": {
+ '@id': 'http://rdfh.ch/standoff/mappings/StandardMapping'
+ }
+ },
+ "@context": context
+ }
+ jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '), cls=KnoraStandoffXmlEncoder)
+ new_value = con.put(path='/v2/values', jsondata=jsondata)
+ if not new_value:
+ print(f'ERROR while updating the xml text of {link_prop.name} of resource {resource.id}')
+ elif verbose:
+ print(f' Successfully updated Property: {link_prop.name} Type: XML Text\n'
+ f' Value: {new_xmltext}')
+
+
+def update_resptr_props(
+ resource: XMLResource,
+ res_iri: str,
+ prop_2_resptrs: dict[XMLProperty, list[str]],
+ res_iri_lookup: dict[str, str],
+ con: Connection,
+ verbose: bool
+) -> None:
+ existing_resource = con.get(path=f'/v2/resources/{quote_plus(res_iri)}')
+ context = existing_resource['@context']
+ for link_prop, resptrs in prop_2_resptrs.items():
+ for resptr in resptrs:
+ jsonobj = {
+ '@id': res_iri,
+ '@type': resource.restype,
+ f'{link_prop.name}Value': {
+ '@type': 'knora-api:LinkValue',
+ 'knora-api:linkValueHasTargetIri': {
+ '@id': res_iri_lookup[resptr]
+ }
+ },
+ '@context': context
+ }
+ jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '))
+ new_value = con.post(path='/v2/values', jsondata=jsondata)
+ if not new_value:
+ print(f'ERROR while updating the resptr prop of {link_prop.name} of resource {resource.id}')
+ elif verbose:
+ print(f' Successfully updated Property: {link_prop.name} Type: Link property\n'
+ f' Value: {resptr}')
+
+
def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: str, sipi: str, verbose: bool,
validate_only: bool, incremental: bool) -> None:
"""
@@ -698,11 +877,11 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s
elif child.tag == "resource":
resources.append(XMLResource(child, default_ontology))
- # sort the resources (resources which do not link to others come first) but only if not an incremental upload
+ # temporarily remove circular references, but only if not an incremental upload
if not incremental:
- resources = do_sort_order(resources, verbose)
+ resources, stashed_xml_texts, stashed_resptr_props = remove_circular_references(resources, verbose)
- sipi = Sipi(sipi, con.get_token())
+ sipi_server = Sipi(sipi, con.get_token())
# get the project information and project ontology from the server
project = ResourceInstanceFactory(con, shortcode)
@@ -713,9 +892,9 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s
permissions_lookup[key] = perm.get_permission_instance()
# create a dictionary to look up resource classes
- res_classes: dict[str, type] = {}
+ resclass_name_2_type: dict[str, type] = {}
for res_class_name in project.get_resclass_names():
- res_classes[res_class_name] = project.get_resclass(res_class_name)
+ resclass_name_2_type[res_class_name] = project.get_resclass_type(res_class_name)
res_iri_lookup: dict[str, str] = {}
@@ -730,7 +909,7 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s
resource_bitstream = None
if resource.bitstream:
- img = sipi.upload_bitstream(os.path.join(imgdir, resource.bitstream.value))
+ img = sipi_server.upload_bitstream(os.path.join(imgdir, resource.bitstream.value))
internal_file_name_bitstream = img['uploadedFiles'][0]['internalFilename']
resource_bitstream = resource.get_bitstream(internal_file_name_bitstream, permissions_lookup)
@@ -738,27 +917,74 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s
try:
# create a resource instance (ResourceInstance) from the given resource in the XML (XMLResource)
- instance: ResourceInstance = res_classes[resource.restype](con=con,
- label=resource.label,
- iri=resource_iri,
- permissions=permissions_tmp,
- bitstream=resource_bitstream,
- values=resource.get_propvals(res_iri_lookup,
- permissions_lookup)).create()
+ resclass_type = resclass_name_2_type[resource.restype]
+ properties = resource.get_propvals(res_iri_lookup, permissions_lookup)
+ resclass_instance: ResourceInstance = resclass_type(
+ con=con,
+ label=resource.label,
+ iri=resource_iri,
+ permissions=permissions_tmp,
+ bitstream=resource_bitstream,
+ values=properties
+ )
+ resclass_instance = resclass_instance.create()
except BaseError as err:
- print(
- f"ERROR while trying to create resource '{resource.label}' ({resource.id}). The error message was: {err.message}")
+ print(f"ERROR while trying to create resource '{resource.label}' ({resource.id}). "
+ f"The error message was: {err.message}")
failed_uploads.append(resource.id)
continue
-
except Exception as exception:
- print(
- f"ERROR while trying to create resource '{resource.label}' ({resource.id}). The error message was: {exception}")
+ print(f"EXCEPTION while trying to create resource '{resource.label}' ({resource.id}). "
+ f"The exception message was: {exception}")
failed_uploads.append(resource.id)
continue
- res_iri_lookup[resource.id] = instance.iri
- print(f"Created resource '{instance.label}' ({resource.id}) with IRI '{instance.iri}'")
+ res_iri_lookup[resource.id] = resclass_instance.iri
+ print(f"Created resource '{resclass_instance.label}' ({resource.id}) with IRI '{resclass_instance.iri}'")
+
+ # update the resources with the stashed XML texts
+ if len(stashed_xml_texts) > 0:
+ print('Update the stashed XML texts...')
+ for resource, link_props in stashed_xml_texts.items():
+ print(f'Update XML text(s) of resource "{resource.id}"...')
+ res_iri = res_iri_lookup[resource.id]
+ try:
+ update_xml_texts(
+ resource=resource,
+ res_iri=res_iri,
+ link_props=link_props,
+ res_iri_lookup=res_iri_lookup,
+ con=con,
+ verbose=verbose
+ )
+ except BaseError as err:
+ print(f'BaseError while updating an XML text of resource "{resource.id}": {err.message}')
+ continue
+ except Exception as exception:
+ print(f'Exception while updating an XML text of resource "{resource.id}": {exception}')
+ continue
+
+ # update the resources with the stashed resptrs
+ if len(stashed_resptr_props) > 0:
+ print('Update the stashed resptrs...')
+ for resource, prop_2_resptrs in stashed_resptr_props.items():
+ print(f'Update resptrs of resource "{resource.id}"...')
+ res_iri = res_iri_lookup[resource.id]
+ try:
+ update_resptr_props(
+ resource=resource,
+ res_iri=res_iri,
+ prop_2_resptrs=prop_2_resptrs,
+ res_iri_lookup=res_iri_lookup,
+ con=con,
+ verbose=verbose
+ )
+ except BaseError as err:
+ print(f'BaseError while updating an XML text of resource "{resource.id}": {err.message}')
+ continue
+ except Exception as exception:
+ print(f'Exception while updating an XML text of resource "{resource.id}": {exception}')
+ continue
# write mapping of internal IDs to IRIs to file with timestamp
timestamp_now = datetime.now()
diff --git a/test/e2e/test_resource.py b/test/e2e/test_resource.py
index 8202e1544..eccb747fa 100644
--- a/test/e2e/test_resource.py
+++ b/test/e2e/test_resource.py
@@ -24,7 +24,7 @@ def test_Resource_create(self) -> None:
factory = ResourceInstanceFactory(self.con, 'anything')
# get a blue_thing resource class
- blue_thing = factory.get_resclass('anything:BlueThing')
+ blue_thing = factory.get_resclass_type('anything:BlueThing')
a_blue_thing = blue_thing(con=self.con,
label='BlueThing',
@@ -50,7 +50,7 @@ def test_Resource_create(self) -> None:
self.assertEqual(new_blue_thing.value("anything:hasDecimal"), 3.14159)
self.assertEqual(new_blue_thing.value("anything:hasText"), "Dies ist ein einfacher Text")
- thing_picture = factory.get_resclass('anything:ThingPicture')
+ thing_picture = factory.get_resclass_type('anything:ThingPicture')
sipi = Sipi('http://0.0.0.0:1024', self.con.get_token())
img = sipi.upload_bitstream('testdata/bitstreams/TEMP11.TIF')
file_ref = img['uploadedFiles'][0]['internalFilename']
diff --git a/test/e2e/test_tools.py b/test/e2e/test_tools.py
index 46e397add..ad8a2e5c8 100644
--- a/test/e2e/test_tools.py
+++ b/test/e2e/test_tools.py
@@ -172,7 +172,7 @@ def test_xml_upload(self) -> None:
server=self.server,
user=self.user,
password='test',
- imgdir='testdata/bitstreams',
+ imgdir='.',
sipi='http://0.0.0.0:1024',
verbose=False,
validate_only=False,
diff --git a/testdata/test-data.xml b/testdata/test-data.xml
index 068520ff5..fc65bc42c 100644
--- a/testdata/test-data.xml
+++ b/testdata/test-data.xml
@@ -36,6 +36,9 @@
Dies ist ein TestThing ohne Angabe von permissions
+
+ obj_0001
+
Nochmals ein einfacher Text
- This isbold andstringtext!
+
+ This isbold andstringtext! It contains links to all resources:
+ obj_0000
+ obj_0001
+ obj_0002
+ obj_0003
+ obj_0004
+ obj_0005
+ obj_0006
+ obj_0007
+ obj_0008
+ obj_0009
+ obj_0010
+ obj_0011
+
https://dasch.swiss
@@ -95,11 +112,41 @@
+
+
+ Dies ist ein einfacher Text ohne Markup
+ Nochmals ein einfacher Text
+
+
+
+ This isbold andstringtext! It contains links to all resources:
+ obj_0000
+ obj_0001
+ obj_0002
+ obj_0003
+ obj_0004
+ obj_0005
+ obj_0006
+ obj_0007
+ obj_0008
+ obj_0009
+ obj_0010
+ obj_0011
+
+
+
+ false
+
+
+
- TEMP11.TIF
+ testdata/bitstreams/TEMP11.TIF
This is a Imagething
@@ -118,7 +165,7 @@
restype=":PartOfThing"
id="obj_0004"
permissions="res-default">
- TEMP12.TIF
+ testdata/bitstreams/TEMP12.TIF
obj_0003
@@ -131,7 +178,7 @@
restype=":PartOfThing"
id="obj_0005"
permissions="res-default">
- TEMP13.TIF
+ testdata/bitstreams/TEMP13.TIF
obj_0003
@@ -144,7 +191,7 @@
restype=":PartOfThing"
id="obj_0006"
permissions="res-default">
- TEMP14.TIF
+ testdata/bitstreams/TEMP14.TIF
obj_0003
@@ -157,7 +204,7 @@
restype=":DocumentThing"
id="obj_0007"
permissions="res-default">
- test.pdf
+ testdata/bitstreams/test.pdf
This is a Documentthing
@@ -167,7 +214,7 @@
restype=":TextThing"
id="obj_0008"
permissions="res-default">
- test.csv
+ testdata/bitstreams/test.csv
This is a Textthing as CSV
@@ -177,7 +224,7 @@
restype=":ZipThing"
id="obj_0009"
permissions="res-default">
- test.zip
+ testdata/bitstreams/test.zip
This is a Zipthing
@@ -187,19 +234,19 @@
restype=":AudioThing"
id="obj_0010"
permissions="res-default">
- clara.wav
+ testdata/bitstreams/clara.wav
This is a Audiothing
-
+
Some text
-
+
Some text
diff --git a/testdata/test-onto.json b/testdata/test-onto.json
index 1f722e37f..ae30e90fd 100644
--- a/testdata/test-onto.json
+++ b/testdata/test-onto.json
@@ -366,6 +366,17 @@
"en": "Another thing"
},
"gui_element": "Searchbox"
+ },
+ {
+ "name": "hasTestThing",
+ "super": [
+ "hasLinkTo"
+ ],
+ "object": ":TestThing",
+ "labels": {
+ "en": "Another thing"
+ },
+ "gui_element": "Searchbox"
}
],
"resources": [
@@ -487,6 +498,11 @@
"propname": ":hasText",
"gui_order": 1,
"cardinality": "1"
+ },
+ {
+ "propname": ":hasTestThing",
+ "gui_order": 2,
+ "cardinality": "0-n"
}
]
},