diff --git a/knora/dsplib/models/resource.py b/knora/dsplib/models/resource.py index 671cbcb57..6427e887f 100644 --- a/knora/dsplib/models/resource.py +++ b/knora/dsplib/models/resource.py @@ -25,7 +25,7 @@ class KnoraStandoffXmlEncoder(json.JSONEncoder): def default(self, obj) -> str: if isinstance(obj, KnoraStandoffXml): - return '\n' + obj.getXml() + '' + return '\n' + str(obj) + '' elif isinstance(obj, OntoInfo): return obj.iri + "#" if obj.hashtag else "" return json.JSONEncoder.default(self, obj) @@ -275,10 +275,9 @@ def toJsonLdObj(self, action: Actions) -> Any: tmp['@context'] = self.context return tmp - def create(self): + def create(self) -> 'ResourceInstance': jsonobj = self.toJsonLdObj(Actions.Create) jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '), cls=KnoraStandoffXmlEncoder) - # print("jsondata", jsondata) result = self._con.post(ResourceInstance.ROUTE, jsondata) newinstance = self.clone() newinstance._iri = result['@id'] @@ -394,7 +393,7 @@ def _get_baseclass(self, superclasses: list[str]) -> Union[str, None]: return self._get_baseclass(gaga.superclasses) return None - def get_resclass(self, prefixedresclass: str) -> Type: + def get_resclass_type(self, prefixedresclass: str) -> Type: prefix, resclass_name = prefixedresclass.split(':') resclass = [x for x in self._ontologies[prefix].resource_classes if x.name == resclass_name][0] baseclass = self._get_baseclass(resclass.superclasses) diff --git a/knora/dsplib/models/sipi.py b/knora/dsplib/models/sipi.py index c8a25b717..3cb026a4d 100644 --- a/knora/dsplib/models/sipi.py +++ b/knora/dsplib/models/sipi.py @@ -1,7 +1,6 @@ import os - import requests - +from typing import Any from .helpers import BaseError @@ -30,7 +29,7 @@ def __init__(self, sipi_server: str, token: str): self.sipi_server = sipi_server self.token = token - def upload_bitstream(self, filepath): + def upload_bitstream(self, filepath: str) -> dict[Any, Any]: """ Uploads a bitstream to the Sipi server @@ -45,5 +44,5 @@ def upload_bitstream(self, filepath): req = requests.post(self.sipi_server + "/upload?token=" + self.token, files=files) on_api_error(req) print(f'Uploaded file {filepath}') - res = req.json() + res: dict[Any, Any] = req.json() return res diff --git a/knora/dsplib/models/value.py b/knora/dsplib/models/value.py index c3af54204..bf98020c7 100644 --- a/knora/dsplib/models/value.py +++ b/knora/dsplib/models/value.py @@ -16,21 +16,21 @@ class KnoraStandoffXml: __iriregexp = re.compile(r'IRI:[^:]*:IRI') __xmlstr: str - def __init__(self, xmlstr: str) -> str: + def __init__(self, xmlstr: str) -> None: self.__xmlstr = str(xmlstr) def __str__(self) -> str: return self.__xmlstr - def getXml(self) -> str: - return self.__xmlstr - - def findall(self) -> Union[list[str], None]: + def get_all_iris(self) -> Optional[list[str]]: return self.__iriregexp.findall(self.__xmlstr) def replace(self, fromStr: str, toStr: str) -> None: self.__xmlstr = self.__xmlstr.replace(fromStr, toStr) + def regex_replace(self, pattern: str, repl: str) -> None: + self.__xmlstr = re.sub(pattern=repr(pattern)[1:-1], repl=repl, string=self.__xmlstr) + @strict class Value: diff --git a/knora/dsplib/utils/onto_validate.py b/knora/dsplib/utils/onto_validate.py index 6367a2ffb..5b97046ce 100644 --- a/knora/dsplib/utils/onto_validate.py +++ b/knora/dsplib/utils/onto_validate.py @@ -1,14 +1,13 @@ import json import os -from typing import Dict, Union - +import re +from typing import Any, Union, List, Set import jsonschema -from jsonschema import validate - +import jsonpath_ng, jsonpath_ng.ext from ..utils.expand_all_lists import expand_lists_from_excel -def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool: +def validate_ontology(input_file_or_json: Union[str, dict[Any, Any], 'os.PathLike[Any]']) -> bool: """ Validates an ontology against the knora schema @@ -18,8 +17,8 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool Returns: True if ontology passed validation, False otherwise """ - data_model = '' + data_model: dict[Any, Any] = {} if isinstance(input_file_or_json, dict): data_model = input_file_or_json elif os.path.isfile(input_file_or_json): @@ -38,15 +37,138 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool # validate the data model against the schema current_dir = os.path.dirname(os.path.realpath(__file__)) - with open(os.path.join(current_dir, '../schemas/ontology.json')) as s: schema = json.load(s) - try: - validate(instance=data_model, schema=schema) + jsonschema.validate(instance=data_model, schema=schema) except jsonschema.exceptions.ValidationError as err: print(f'Data model did not pass validation. The error message is: {err.message}\n' f'The error occurred at {err.json_path}') return False - print('Data model is syntactically correct and passed validation.') - return True + + # cardinalities check for circular references + if check_cardinalities_of_circular_references(data_model): + print('Data model is syntactically correct and passed validation.') + return True + else: + return False + + +def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bool: + """ + Check if there are properties derived from hasLinkTo that form a circular reference. If so, these + properties must have the cardinality 0-1 or 0-n, because during the xmlupload process, these values + are temporarily removed. + """ + + # search the ontology for all properties that are derived from hasLinkTo, store them in a dict, and map + # them to their objects (i.e. the resource classes they point to) + # example: if the property 'rosetta:hasTextMedium' points to 'rosetta:Image2D': + # link_properties = {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...} + ontos = data_model['project']['ontologies'] + link_properties: dict[str, List[str]] = dict() + for index, onto in enumerate(ontos): + hasLinkTo_matches = jsonpath_ng.ext.parse( + f'$.project.ontologies[{index}].properties[?@.super[*] == hasLinkTo]' + ).find(data_model) + prop_obj_pair: dict[str, List[str]] = dict() + for match in hasLinkTo_matches: + prop = onto['name'] + ':' + match.value['name'] + target = match.value['object'] + if target != 'Resource': + # make the target a fully qualified name (with the ontology's name prefixed) + target = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', target) + prop_obj_pair[prop] = [target] + link_properties.update(prop_obj_pair) + + # in case the object of a property is "Resource", the link can point to any resource class + all_res_names: List[str] = list() + for index, onto in enumerate(ontos): + matches = jsonpath_ng.ext.parse(f'$.resources[*].name').find(onto) + tmp = [f'{onto["name"]}:{match.value}' for match in matches] + all_res_names.extend(tmp) + for prop, targ in link_properties.items(): + if 'Resource' in targ: + link_properties[prop] = all_res_names + + # make a dict that maps resource classes to their hasLinkTo-properties, and to the classes they point to + # example: if 'rosetta:Text' has the property 'rosetta:hasTextMedium' that points to 'rosetta:Image2D': + # dependencies = {'rosetta:Text': {'rosetta:hasTextMedium': ['rosetta:Image2D'], ...}} + dependencies: dict[str, dict[str, List[str]]] = dict() + for onto in ontos: + for resource in onto['resources']: + resname: str = onto['name'] + ':' + resource['name'] + for card in resource['cardinalities']: + # make the cardinality a fully qualified name (with the ontology's name prefixed) + cardname = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', card['propname']) + if cardname in link_properties: + # Look out: if `targets` is created with `targets = link_properties[cardname]`, the ex- + # pression `dependencies[resname][cardname] = targets` causes `dependencies[resname][cardname]` + # to point to `link_properties[cardname]`. Due to that, the expression + # `dependencies[resname][cardname].extend(targets)` will modify 'link_properties'! + # For this reason, `targets` must be created with `targets = list(link_properties[cardname])` + targets = list(link_properties[cardname]) + if resname not in dependencies: + dependencies[resname] = dict() + dependencies[resname][cardname] = targets + elif cardname not in dependencies[resname]: + dependencies[resname][cardname] = targets + else: + dependencies[resname][cardname].extend(targets) + + # iteratively purge dependencies from non-circular references + for _ in range(30): + # remove targets that point to a resource that is not in dependencies, + # remove cardinalities that have no targets + for res, cards in dependencies.copy().items(): + for card, targets in cards.copy().items(): + dependencies[res][card] = [target for target in targets if target in dependencies] + if len(dependencies[res][card]) == 0: + del dependencies[res][card] + # remove resources that have no cardinalities + dependencies = {res: cards for res, cards in dependencies.items() if len(cards) > 0} + # remove resources that are not pointed to by any target + all_targets: Set[str] = set() + for cards in dependencies.values(): + for trgt in cards.values(): + all_targets = all_targets | set(trgt) + dependencies = {res: targets for res, targets in dependencies.items() if res in all_targets} + + # check the remaining dependencies (which are only the circular ones) if they have all 0-1 or 0-n + ok_cardinalities = ['0-1', '0-n'] + notok_dependencies: dict[str, List[str]] = dict() + for res, cards in dependencies.items(): + ontoname, resname = res.split(':') + for card in cards: + # the name of the cardinality could be with prepended onto, only with colon, or without anything + card_without_colon = card.split(':')[1] + card_with_colon = ':' + card_without_colon + card_variations = [card, card_with_colon, card_without_colon] + for card_variation in card_variations: + match = jsonpath_ng.ext.parse( + f'$[?@.name == {ontoname}].resources[?@.name == {resname}].cardinalities[?@.propname == "{card_variation}"]' + ).find(ontos) + if len(match) > 0: + break + card_numbers = match[0].value['cardinality'] + if card_numbers not in ok_cardinalities: + if res not in notok_dependencies: + notok_dependencies[res] = [card] + else: + notok_dependencies[res].append(card) + + if len(notok_dependencies) == 0: + return True + else: + print('ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references ' + 'between resources. This is not a problem in itself, but if you try to upload data that actually ' + 'contains circular references, these "hasLinkTo" cardinalities will be temporarily removed from the ' + 'affected resources. Therefore, it is necessary that the involved "hasLinkTo" cardinalities have a ' + 'cardinality of 0-1 or 0-n. \n' + 'Please make sure that the following cardinalities have a cardinality of 0-1 or 0-n:') + for _res, _cards in notok_dependencies.items(): + print(_res) + for card in _cards: + print(f'\t{card}') + return False + diff --git a/knora/dsplib/utils/xml_upload.py b/knora/dsplib/utils/xml_upload.py index 5ae0f947e..708770046 100644 --- a/knora/dsplib/utils/xml_upload.py +++ b/knora/dsplib/utils/xml_upload.py @@ -8,7 +8,8 @@ import uuid from datetime import datetime from pathlib import Path -from typing import Optional, Union +from typing import Optional, Union, cast, Tuple +from urllib.parse import quote_plus from lxml import etree @@ -17,7 +18,7 @@ from knora.dsplib.models.helpers import BaseError from knora.dsplib.models.permission import Permissions from knora.dsplib.models.project import Project -from knora.dsplib.models.resource import ResourceInstanceFactory, ResourceInstance +from knora.dsplib.models.resource import ResourceInstanceFactory, ResourceInstance, KnoraStandoffXmlEncoder from knora.dsplib.models.sipi import Sipi from knora.dsplib.models.value import KnoraStandoffXml @@ -29,7 +30,7 @@ class XmlError(BaseException): def __init__(self, msg: str): self._message = msg - def __str__(self): + def __str__(self) -> str: return 'XML-ERROR: ' + self._message @@ -122,7 +123,7 @@ def __init__(self, node: etree.Element, val_type: str, listname: Optional[str] = xmlstr = xmlstr.replace('', '') xmlstr = xmlstr.replace('', '') self._value = KnoraStandoffXml(xmlstr) - tmp_id_list = self._value.findall() + tmp_id_list = self._value.get_all_iris() if tmp_id_list: refs = set() for tmp_id in tmp_id_list: @@ -139,11 +140,19 @@ def value(self) -> Union[str, KnoraStandoffXml]: """The actual value of the value instance""" return self._value + @value.setter + def value(self, value: Union[str, KnoraStandoffXml]) -> None: + self._value = value + @property def resrefs(self) -> Optional[list[str]]: """List of resource references""" return self._resrefs + @resrefs.setter + def resrefs(self, resrefs: Optional[list[str]]) -> None: + self._resrefs = resrefs + @property def comment(self) -> str: """Comment about the value""" @@ -313,6 +322,14 @@ def bitstream(self) -> Optional[XMLBitstream]: """The bitstream object belonging to the resource""" return self._bitstream + @property + def properties(self) -> list[XMLProperty]: + return self._properties + + @properties.setter + def properties(self, new_properties: list[XMLProperty]) -> None: + self._properties = new_properties + def print(self) -> None: """Prints the resource and its attributes.""" print(f'Resource: id={self._id}, restype: {self._restype}, label: {self._label}') @@ -321,6 +338,22 @@ def print(self) -> None: for prop in self._properties: prop.print() + def get_props_with_links(self) -> list[XMLProperty]: + """ + Get a list of all XMLProperties that have an outgoing link to another resource, be it a resptr-prop link + or a standoff link in a text. + """ + link_properties: list[XMLProperty] = [] + for prop in self._properties: + if prop.valtype == 'resptr': + link_properties.append(prop) + elif prop.valtype == 'text': + for value in prop.values: + if value.resrefs: + link_properties.append(prop) + break + return link_properties + def get_resptrs(self) -> list[str]: """ Get a list of all resource id's that are referenced by this resource @@ -332,16 +365,20 @@ def get_resptrs(self) -> list[str]: for prop in self._properties: if prop.valtype == 'resptr': for value in prop.values: - resptrs.append(value.value) + resptrs.append(str(value.value)) elif prop.valtype == 'text': for value in prop.values: - if value.resrefs is not None: + if value.resrefs: resptrs.extend(value.resrefs) return resptrs - def get_propvals(self, resiri_lookup: dict[str, str], permissions_lookup: dict[str, Permissions]) -> dict[str, Permissions]: + def get_propvals( + self, + resiri_lookup: dict[str, str], + permissions_lookup: dict[str, Permissions] + ) -> dict[str, Union[list[Union[str, dict[str, str]]], str, dict[str, str]]]: """ - Get a dictionary of the property names and their values belonging to a resource + Get a dictionary of the property names and their values. Replace the internal ids by their IRI first. Args: resiri_lookup: Is used to solve internal unique id's of resources to real IRI's @@ -363,7 +400,7 @@ def get_propvals(self, resiri_lookup: dict[str, str], permissions_lookup: dict[s v = value.value # if we do not find the id, we assume it's a valid knora IRI elif prop.valtype == 'text': if isinstance(value.value, KnoraStandoffXml): - iri_refs = value.value.findall() + iri_refs = value.value.get_all_iris() for iri_ref in iri_refs: res_id = iri_ref.split(':')[1] iri = resiri_lookup.get(res_id) @@ -495,37 +532,44 @@ def get_permission_instance(self) -> Permissions: permissions.add(allow.permission, allow.group) return permissions - def __str__(self): + def __str__(self) -> str: allow_str: list[str] = [] for allow in self._allows: allow_str.append("{} {}".format(allow.permission, allow.group)) return '|'.join(allow_str) - def print(self): + def print(self) -> None: """Prints the permission set""" print('Permission: ', self._id) for a in self._allows: a.print() -def do_sort_order(resources: list[XMLResource], verbose) -> list[XMLResource]: +def remove_circular_references(resources: list[XMLResource], verbose: bool) -> \ + tuple[list[XMLResource], + dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]], + dict[XMLResource, dict[XMLProperty, list[str]]] + ]: """ - Sorts a list of resources. - - Resources that reference other resources are added after the referenced resources. The method will report circular - references and exit with an error if there are any unresolvable references. + Temporarily removes problematic resource-references from a list of resources. A reference is problematic if + it creates a circle (circular references). Args: - resources: list of resources to sort + resources: list of resources that possibly contain circular references verbose: verbose output if True Returns: - sorted list of resources + list: list of cleaned resources + stashed_xml_texts: dict with the stashed XML texts + stashed_resptr_props: dict with the stashed resptr-props """ if verbose: print("Checking resources for unresolvable references...") + stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]] = {} + stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]] = {} + # sort the resources according to outgoing resptrs ok_resources: list[XMLResource] = [] nok_resources: list[XMLResource] = [] @@ -550,20 +594,77 @@ def do_sort_order(resources: list[XMLResource], verbose) -> list[XMLResource]: nok_resources.append(resource) resources = nok_resources if len(nok_resources) == nok_len: - print("ERROR Unable to resolve all resptr dependencies.") - for res in nok_resources: - unresolvable_resptrs = [] - for resptr_id in res.get_resptrs(): - if resptr_id not in ok_res_ids: - unresolvable_resptrs.append(resptr_id) - print(f"\tResource '{res.id}' has unresolvable resptrs to {unresolvable_resptrs}") - exit(1) + # there are circular references. go through all problematic resources, and stash the problematic references. + nok_resources, ok_res_ids, ok_resources, stashed_xml_texts, stashed_resptr_props = stash_circular_references( + nok_resources, + ok_res_ids, + ok_resources, + stashed_xml_texts, + stashed_resptr_props + ) nok_len = len(nok_resources) nok_resources = [] cnt += 1 if verbose: - print(f'{cnt}. ordering finished.') - return ok_resources + print(f'{cnt}. ordering pass finished.') + return ok_resources, stashed_xml_texts, stashed_resptr_props + + +def stash_circular_references( + nok_resources: list[XMLResource], + ok_res_ids: list[str], + ok_resources: list[XMLResource], + stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]], + stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]] +) -> Tuple[ + list[XMLResource], + list[str], + list[XMLResource], + dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]], + dict[XMLResource, dict[XMLProperty, list[str]]] +]: + for res in nok_resources.copy(): + for link_prop in res.get_props_with_links(): + if link_prop.valtype == 'text': + for value in link_prop.values: + if value.resrefs and not all([_id in ok_res_ids for _id in value.resrefs]): + # stash this XML text, replace it by its hash, and remove the + # problematic resrefs from the XMLValue's resrefs list + value_hash = str(hash(f'{value.value}{datetime.now()}')) + if res not in stashed_xml_texts: + stashed_xml_texts[res] = {link_prop: {value_hash: cast(KnoraStandoffXml, value.value)}} + elif link_prop not in stashed_xml_texts[res]: + stashed_xml_texts[res][link_prop] = {value_hash: cast(KnoraStandoffXml, value.value)} + else: + stashed_xml_texts[res][link_prop][value_hash] = cast(KnoraStandoffXml, value.value) + value.value = KnoraStandoffXml(value_hash) + value.resrefs = [_id for _id in value.resrefs if _id in ok_res_ids] + elif link_prop.valtype == 'resptr': + for value in link_prop.values.copy(): + if value.value not in ok_res_ids: + # value.value is the id of the target resource. stash it, then delete it + if res not in stashed_resptr_props: + stashed_resptr_props[res] = {} + stashed_resptr_props[res][link_prop] = [str(value.value)] + else: + if link_prop not in stashed_resptr_props[res]: + stashed_resptr_props[res][link_prop] = [str(value.value)] + else: + stashed_resptr_props[res][link_prop].append(str(value.value)) + link_prop.values.remove(value) + else: + raise BaseError(f'ERROR in remove_circular_references(): link_prop.valtype is ' + f'neither text nor resptr.') + + if len(link_prop.values) == 0: + # if all values of a link property have been stashed, the property needs to be removed + res.properties.remove(link_prop) + + ok_resources.append(res) + ok_res_ids.append(res.id) + nok_resources.remove(res) + + return nok_resources, ok_res_ids, ok_resources, stashed_xml_texts, stashed_resptr_props def validate_xml_against_schema(input_file: str, schema_file: str) -> bool: @@ -586,6 +687,7 @@ def validate_xml_against_schema(input_file: str, schema_file: str) -> bool: print("The input data file cannot be uploaded due to the following validation error(s):") for error in xmlschema.error_log: print(f" Line {error.line}: {error.message}") + return False def convert_ark_v0_to_resource_iri(ark: str) -> str: @@ -628,6 +730,83 @@ def convert_ark_v0_to_resource_iri(ark: str) -> str: return "http://rdfh.ch/" + project_id + "/" + dsp_uuid +def update_xml_texts( + resource: XMLResource, + res_iri: str, + link_props: dict[XMLProperty, dict[str, KnoraStandoffXml]], + res_iri_lookup: dict[str, str], + con: Connection, + verbose: bool +) -> None: + existing_resource = con.get(path=f'/v2/resources/{quote_plus(res_iri)}') + context = existing_resource['@context'] + for link_prop, hash_to_value in link_props.items(): + values = existing_resource[link_prop.name] + if not isinstance(values, list): + values = [values, ] + for value in values: + xmltext = value.get("knora-api:textValueAsXml") + if xmltext: + _hash = re.sub(r'<\?xml.+>(\n)?()(.+)(<\/text>)', r'\3', xmltext) + if _hash in hash_to_value: + new_xmltext = hash_to_value[_hash] + for _id, _iri in res_iri_lookup.items(): + new_xmltext.regex_replace(f'href="IRI:{_id}:IRI"', f'href="{_iri}"') + val_iri = value['@id'] + jsonobj = { + "@id": res_iri, + "@type": resource.restype, + link_prop.name: { + "@id": val_iri, + "@type": "knora-api:TextValue", + "knora-api:textValueAsXml": new_xmltext, + "knora-api:textValueHasMapping": { + '@id': 'http://rdfh.ch/standoff/mappings/StandardMapping' + } + }, + "@context": context + } + jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '), cls=KnoraStandoffXmlEncoder) + new_value = con.put(path='/v2/values', jsondata=jsondata) + if not new_value: + print(f'ERROR while updating the xml text of {link_prop.name} of resource {resource.id}') + elif verbose: + print(f' Successfully updated Property: {link_prop.name} Type: XML Text\n' + f' Value: {new_xmltext}') + + +def update_resptr_props( + resource: XMLResource, + res_iri: str, + prop_2_resptrs: dict[XMLProperty, list[str]], + res_iri_lookup: dict[str, str], + con: Connection, + verbose: bool +) -> None: + existing_resource = con.get(path=f'/v2/resources/{quote_plus(res_iri)}') + context = existing_resource['@context'] + for link_prop, resptrs in prop_2_resptrs.items(): + for resptr in resptrs: + jsonobj = { + '@id': res_iri, + '@type': resource.restype, + f'{link_prop.name}Value': { + '@type': 'knora-api:LinkValue', + 'knora-api:linkValueHasTargetIri': { + '@id': res_iri_lookup[resptr] + } + }, + '@context': context + } + jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': ')) + new_value = con.post(path='/v2/values', jsondata=jsondata) + if not new_value: + print(f'ERROR while updating the resptr prop of {link_prop.name} of resource {resource.id}') + elif verbose: + print(f' Successfully updated Property: {link_prop.name} Type: Link property\n' + f' Value: {resptr}') + + def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: str, sipi: str, verbose: bool, validate_only: bool, incremental: bool) -> None: """ @@ -698,11 +877,11 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s elif child.tag == "resource": resources.append(XMLResource(child, default_ontology)) - # sort the resources (resources which do not link to others come first) but only if not an incremental upload + # temporarily remove circular references, but only if not an incremental upload if not incremental: - resources = do_sort_order(resources, verbose) + resources, stashed_xml_texts, stashed_resptr_props = remove_circular_references(resources, verbose) - sipi = Sipi(sipi, con.get_token()) + sipi_server = Sipi(sipi, con.get_token()) # get the project information and project ontology from the server project = ResourceInstanceFactory(con, shortcode) @@ -713,9 +892,9 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s permissions_lookup[key] = perm.get_permission_instance() # create a dictionary to look up resource classes - res_classes: dict[str, type] = {} + resclass_name_2_type: dict[str, type] = {} for res_class_name in project.get_resclass_names(): - res_classes[res_class_name] = project.get_resclass(res_class_name) + resclass_name_2_type[res_class_name] = project.get_resclass_type(res_class_name) res_iri_lookup: dict[str, str] = {} @@ -730,7 +909,7 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s resource_bitstream = None if resource.bitstream: - img = sipi.upload_bitstream(os.path.join(imgdir, resource.bitstream.value)) + img = sipi_server.upload_bitstream(os.path.join(imgdir, resource.bitstream.value)) internal_file_name_bitstream = img['uploadedFiles'][0]['internalFilename'] resource_bitstream = resource.get_bitstream(internal_file_name_bitstream, permissions_lookup) @@ -738,27 +917,74 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s try: # create a resource instance (ResourceInstance) from the given resource in the XML (XMLResource) - instance: ResourceInstance = res_classes[resource.restype](con=con, - label=resource.label, - iri=resource_iri, - permissions=permissions_tmp, - bitstream=resource_bitstream, - values=resource.get_propvals(res_iri_lookup, - permissions_lookup)).create() + resclass_type = resclass_name_2_type[resource.restype] + properties = resource.get_propvals(res_iri_lookup, permissions_lookup) + resclass_instance: ResourceInstance = resclass_type( + con=con, + label=resource.label, + iri=resource_iri, + permissions=permissions_tmp, + bitstream=resource_bitstream, + values=properties + ) + resclass_instance = resclass_instance.create() except BaseError as err: - print( - f"ERROR while trying to create resource '{resource.label}' ({resource.id}). The error message was: {err.message}") + print(f"ERROR while trying to create resource '{resource.label}' ({resource.id}). " + f"The error message was: {err.message}") failed_uploads.append(resource.id) continue - except Exception as exception: - print( - f"ERROR while trying to create resource '{resource.label}' ({resource.id}). The error message was: {exception}") + print(f"EXCEPTION while trying to create resource '{resource.label}' ({resource.id}). " + f"The exception message was: {exception}") failed_uploads.append(resource.id) continue - res_iri_lookup[resource.id] = instance.iri - print(f"Created resource '{instance.label}' ({resource.id}) with IRI '{instance.iri}'") + res_iri_lookup[resource.id] = resclass_instance.iri + print(f"Created resource '{resclass_instance.label}' ({resource.id}) with IRI '{resclass_instance.iri}'") + + # update the resources with the stashed XML texts + if len(stashed_xml_texts) > 0: + print('Update the stashed XML texts...') + for resource, link_props in stashed_xml_texts.items(): + print(f'Update XML text(s) of resource "{resource.id}"...') + res_iri = res_iri_lookup[resource.id] + try: + update_xml_texts( + resource=resource, + res_iri=res_iri, + link_props=link_props, + res_iri_lookup=res_iri_lookup, + con=con, + verbose=verbose + ) + except BaseError as err: + print(f'BaseError while updating an XML text of resource "{resource.id}": {err.message}') + continue + except Exception as exception: + print(f'Exception while updating an XML text of resource "{resource.id}": {exception}') + continue + + # update the resources with the stashed resptrs + if len(stashed_resptr_props) > 0: + print('Update the stashed resptrs...') + for resource, prop_2_resptrs in stashed_resptr_props.items(): + print(f'Update resptrs of resource "{resource.id}"...') + res_iri = res_iri_lookup[resource.id] + try: + update_resptr_props( + resource=resource, + res_iri=res_iri, + prop_2_resptrs=prop_2_resptrs, + res_iri_lookup=res_iri_lookup, + con=con, + verbose=verbose + ) + except BaseError as err: + print(f'BaseError while updating an XML text of resource "{resource.id}": {err.message}') + continue + except Exception as exception: + print(f'Exception while updating an XML text of resource "{resource.id}": {exception}') + continue # write mapping of internal IDs to IRIs to file with timestamp timestamp_now = datetime.now() diff --git a/test/e2e/test_resource.py b/test/e2e/test_resource.py index 8202e1544..eccb747fa 100644 --- a/test/e2e/test_resource.py +++ b/test/e2e/test_resource.py @@ -24,7 +24,7 @@ def test_Resource_create(self) -> None: factory = ResourceInstanceFactory(self.con, 'anything') # get a blue_thing resource class - blue_thing = factory.get_resclass('anything:BlueThing') + blue_thing = factory.get_resclass_type('anything:BlueThing') a_blue_thing = blue_thing(con=self.con, label='BlueThing', @@ -50,7 +50,7 @@ def test_Resource_create(self) -> None: self.assertEqual(new_blue_thing.value("anything:hasDecimal"), 3.14159) self.assertEqual(new_blue_thing.value("anything:hasText"), "Dies ist ein einfacher Text") - thing_picture = factory.get_resclass('anything:ThingPicture') + thing_picture = factory.get_resclass_type('anything:ThingPicture') sipi = Sipi('http://0.0.0.0:1024', self.con.get_token()) img = sipi.upload_bitstream('testdata/bitstreams/TEMP11.TIF') file_ref = img['uploadedFiles'][0]['internalFilename'] diff --git a/test/e2e/test_tools.py b/test/e2e/test_tools.py index 46e397add..ad8a2e5c8 100644 --- a/test/e2e/test_tools.py +++ b/test/e2e/test_tools.py @@ -172,7 +172,7 @@ def test_xml_upload(self) -> None: server=self.server, user=self.user, password='test', - imgdir='testdata/bitstreams', + imgdir='.', sipi='http://0.0.0.0:1024', verbose=False, validate_only=False, diff --git a/testdata/test-data.xml b/testdata/test-data.xml index 068520ff5..fc65bc42c 100644 --- a/testdata/test-data.xml +++ b/testdata/test-data.xml @@ -36,6 +36,9 @@ Dies ist ein TestThing ohne Angabe von permissions + + obj_0001 + Nochmals ein einfacher Text - This isbold andstringtext! + + This isbold andstringtext! It contains links to all resources: + obj_0000 + obj_0001 + obj_0002 + obj_0003 + obj_0004 + obj_0005 + obj_0006 + obj_0007 + obj_0008 + obj_0009 + obj_0010 + obj_0011 + https://dasch.swiss @@ -95,11 +112,41 @@ + + + Dies ist ein einfacher Text ohne Markup + Nochmals ein einfacher Text + + + + This isbold andstringtext! It contains links to all resources: + obj_0000 + obj_0001 + obj_0002 + obj_0003 + obj_0004 + obj_0005 + obj_0006 + obj_0007 + obj_0008 + obj_0009 + obj_0010 + obj_0011 + + + + false + + + - TEMP11.TIF + testdata/bitstreams/TEMP11.TIF This is a Imagething @@ -118,7 +165,7 @@ restype=":PartOfThing" id="obj_0004" permissions="res-default"> - TEMP12.TIF + testdata/bitstreams/TEMP12.TIF obj_0003 @@ -131,7 +178,7 @@ restype=":PartOfThing" id="obj_0005" permissions="res-default"> - TEMP13.TIF + testdata/bitstreams/TEMP13.TIF obj_0003 @@ -144,7 +191,7 @@ restype=":PartOfThing" id="obj_0006" permissions="res-default"> - TEMP14.TIF + testdata/bitstreams/TEMP14.TIF obj_0003 @@ -157,7 +204,7 @@ restype=":DocumentThing" id="obj_0007" permissions="res-default"> - test.pdf + testdata/bitstreams/test.pdf This is a Documentthing @@ -167,7 +214,7 @@ restype=":TextThing" id="obj_0008" permissions="res-default"> - test.csv + testdata/bitstreams/test.csv This is a Textthing as CSV @@ -177,7 +224,7 @@ restype=":ZipThing" id="obj_0009" permissions="res-default"> - test.zip + testdata/bitstreams/test.zip This is a Zipthing @@ -187,19 +234,19 @@ restype=":AudioThing" id="obj_0010" permissions="res-default"> - clara.wav + testdata/bitstreams/clara.wav This is a Audiothing - + Some text - + Some text diff --git a/testdata/test-onto.json b/testdata/test-onto.json index 1f722e37f..ae30e90fd 100644 --- a/testdata/test-onto.json +++ b/testdata/test-onto.json @@ -366,6 +366,17 @@ "en": "Another thing" }, "gui_element": "Searchbox" + }, + { + "name": "hasTestThing", + "super": [ + "hasLinkTo" + ], + "object": ":TestThing", + "labels": { + "en": "Another thing" + }, + "gui_element": "Searchbox" } ], "resources": [ @@ -487,6 +498,11 @@ "propname": ":hasText", "gui_order": 1, "cardinality": "1" + }, + { + "propname": ":hasTestThing", + "gui_order": 2, + "cardinality": "0-n" } ] },