diff --git a/knora/dsplib/utils/xml_upload.py b/knora/dsplib/utils/xml_upload.py
index fa512a962..bc3c8b688 100644
--- a/knora/dsplib/utils/xml_upload.py
+++ b/knora/dsplib/utils/xml_upload.py
@@ -5,13 +5,14 @@
import json
import os
import re
+import time
import uuid
from datetime import datetime
from pathlib import Path
-from typing import Optional, Union, cast, Tuple
+from typing import Optional, Union, cast, Tuple, Any, Callable
from urllib.parse import quote_plus
-
from lxml import etree
+from requests import RequestException
from knora.dsplib.models.connection import Connection
from knora.dsplib.models.group import Group
@@ -187,7 +188,7 @@ class XMLProperty:
def __init__(self, node: etree.Element, valtype: str, default_ontology: Optional[str] = None):
"""
- The constructor for the knora property
+ The constructor for the DSP property
Args:
node: the property node, p.ex.
@@ -397,13 +398,16 @@ def get_propvals(
if iri:
v = iri
else:
- v = value.value # if we do not find the id, we assume it's a valid knora IRI
+ v = value.value # if we do not find the id, we assume it's a valid DSP IRI
elif prop.valtype == 'text':
if isinstance(value.value, KnoraStandoffXml):
iri_refs = value.value.get_all_iris()
for iri_ref in iri_refs:
res_id = iri_ref.split(':')[1]
iri = resiri_lookup.get(res_id)
+ if not iri:
+ raise BaseError(f'Resource cannot be created, because it contains a salsah-Link to '
+ f'the following invalid resource: {res_id}.')
value.value.replace(iri_ref, iri)
v = value.value
else:
@@ -700,7 +704,7 @@ def convert_ark_v0_to_resource_iri(ark: str) -> str:
migration.
Args:
- ark : an ARK version 0 of the form ark:/72163/080c-779b9990a0c3f-6e, '72163' being the Name Assigning Authority
+ ark: an ARK version 0 of the form ark:/72163/080c-779b9990a0c3f-6e, '72163' being the Name Assigning Authority
number, '080c' being the project shortcode, '779b9990a0c3f' being an ID derived from the object's Salsah ID and
'6e' being check digits
@@ -730,107 +734,30 @@ def convert_ark_v0_to_resource_iri(ark: str) -> str:
return "http://rdfh.ch/" + project_id + "/" + dsp_uuid
-def update_xml_texts(
- resource: XMLResource,
- res_iri: str,
- link_props: dict[XMLProperty, dict[str, KnoraStandoffXml]],
- res_iri_lookup: dict[str, str],
- con: Connection,
- verbose: bool
-) -> None:
- existing_resource = con.get(path=f'/v2/resources/{quote_plus(res_iri)}')
- context = existing_resource['@context']
- for link_prop, hash_to_value in link_props.items():
- values = existing_resource[link_prop.name]
- if not isinstance(values, list):
- values = [values, ]
- for value in values:
- xmltext = value.get("knora-api:textValueAsXml")
- if xmltext:
- _hash = re.sub(r'<\?xml.+>(\n)?()(.+)(<\/text>)', r'\3', xmltext)
- if _hash in hash_to_value:
- new_xmltext = hash_to_value[_hash]
- for _id, _iri in res_iri_lookup.items():
- new_xmltext.regex_replace(f'href="IRI:{_id}:IRI"', f'href="{_iri}"')
- val_iri = value['@id']
- jsonobj = {
- "@id": res_iri,
- "@type": resource.restype,
- link_prop.name: {
- "@id": val_iri,
- "@type": "knora-api:TextValue",
- "knora-api:textValueAsXml": new_xmltext,
- "knora-api:textValueHasMapping": {
- '@id': 'http://rdfh.ch/standoff/mappings/StandardMapping'
- }
- },
- "@context": context
- }
- jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '), cls=KnoraStandoffXmlEncoder)
- new_value = con.put(path='/v2/values', jsondata=jsondata)
- if not new_value:
- print(f'ERROR while updating the xml text of {link_prop.name} of resource {resource.id}')
- elif verbose:
- print(f' Successfully updated Property: {link_prop.name} Type: XML Text\n'
- f' Value: {new_xmltext}')
-
-
-def update_resptr_props(
- resource: XMLResource,
- res_iri: str,
- prop_2_resptrs: dict[XMLProperty, list[str]],
- res_iri_lookup: dict[str, str],
- con: Connection,
- verbose: bool
-) -> None:
- existing_resource = con.get(path=f'/v2/resources/{quote_plus(res_iri)}')
- context = existing_resource['@context']
- for link_prop, resptrs in prop_2_resptrs.items():
- for resptr in resptrs:
- jsonobj = {
- '@id': res_iri,
- '@type': resource.restype,
- f'{link_prop.name}Value': {
- '@type': 'knora-api:LinkValue',
- 'knora-api:linkValueHasTargetIri': {
- '@id': res_iri_lookup[resptr]
- }
- },
- '@context': context
- }
- jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '))
- new_value = con.post(path='/v2/values', jsondata=jsondata)
- if not new_value:
- print(f'ERROR while updating the resptr prop of {link_prop.name} of resource {resource.id}')
- elif verbose:
- print(f' Successfully updated Property: {link_prop.name} Type: Link property\n'
- f' Value: {resptr}')
-
-
def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: str, sipi: str, verbose: bool,
- validate_only: bool, incremental: bool) -> None:
+ validate_only: bool, incremental: bool) -> bool:
"""
This function reads an XML file and imports the data described in it onto the DSP server.
Args:
- input_file : the XML with the data to be imported onto the DSP server
- server : the DSP server where the data should be imported
- user : the user (e-mail) with which the data should be imported
- password : the password of the user with which the data should be imported
- imgdir : the image directory
- sipi : the sipi instance to be used
- verbose : verbose option for the command, if used more output is given to the user
- validate_only : validation option to validate the XML data without the actual import of the data
+ input_file: the XML with the data to be imported onto the DSP server
+ server: the DSP server where the data should be imported
+ user: the user (e-mail) with which the data should be imported
+ password: the password of the user with which the data should be imported
+ imgdir: the image directory
+ sipi: the sipi instance to be used
+ verbose: verbose option for the command, if used more output is given to the user
+ validate_only: validation option to validate the XML data without the actual import of the data
incremental: if set, IRIs instead of internal IDs are expected as resource pointers
Returns:
- None
+ True if all resources could be uploaded without errors; False if any resource (or part of it) could not be
+ successfully uploaded
"""
# Validate the input XML file
current_dir = os.path.dirname(os.path.realpath(__file__))
schema_file = os.path.join(current_dir, '../schemas/data.xsd')
-
if validate_xml_against_schema(input_file, schema_file):
print("The input data file is syntactically correct and passed validation.")
if validate_only:
@@ -843,40 +770,33 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s
con = Connection(server)
con.login(user, password)
proj_context = ProjectContext(con=con)
+ sipi_server = Sipi(sipi, con.get_token())
- resources: list[XMLResource] = []
- permissions: dict[str, XmlPermission] = {}
-
- # parse the XML file containing the data
+ # parse the XML file
tree = etree.parse(input_file)
-
- # Iterate through all XML elements
for elem in tree.getiterator():
- # Skip comments and processing instructions,
- # because they do not have names
- if not (
- isinstance(elem, etree._Comment)
- or isinstance(elem, etree._ProcessingInstruction)
- ):
- # Remove a namespace URI in the element's name
- elem.tag = etree.QName(elem).localname
-
- # Remove unused namespace declarations
- etree.cleanup_namespaces(tree)
-
- knora = tree.getroot()
- default_ontology = knora.attrib['default-ontology']
- shortcode = knora.attrib['shortcode']
-
- for child in knora:
- # get all permissions
+ if not (isinstance(elem, etree._Comment) or isinstance(elem, etree._ProcessingInstruction)):
+ elem.tag = etree.QName(elem).localname # remove namespace URI in the element's name
+ etree.cleanup_namespaces(tree) # remove unused namespace declarations
+
+ root = tree.getroot()
+ default_ontology = root.attrib['default-ontology']
+ shortcode = root.attrib['shortcode']
+
+ resources: list[XMLResource] = []
+ permissions: dict[str, XmlPermission] = {}
+ for child in root:
if child.tag == "permissions":
permission = XmlPermission(child, proj_context)
permissions[permission.id] = permission
- # get all resources
elif child.tag == "resource":
resources.append(XMLResource(child, default_ontology))
+ # get the project information and project ontology from the server
+ project = ResourceInstanceFactory(con, shortcode)
+ permissions_lookup: dict[str, Permissions] = {s: perm.get_permission_instance() for s, perm in permissions.items()}
+ resclass_name_2_type: dict[str, type] = {s: project.get_resclass_type(s) for s in project.get_resclass_names()}
+
# temporarily remove circular references, but only if not an incremental upload
if not incremental:
resources, stashed_xml_texts, stashed_resptr_props = remove_circular_references(resources, verbose)
@@ -884,24 +804,77 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s
stashed_xml_texts = dict()
stashed_resptr_props = dict()
- sipi_server = Sipi(sipi, con.get_token())
+ id2iri_mapping: dict[str, str] = {}
+ failed_uploads: list[str] = []
- # get the project information and project ontology from the server
- project = ResourceInstanceFactory(con, shortcode)
+ try:
+ id2iri_mapping, failed_uploads = upload_resources(verbose, resources, imgdir, sipi_server, permissions_lookup,
+ resclass_name_2_type, id2iri_mapping, con, failed_uploads)
+ except BaseException as err:
+ handle_upload_error(err, input_file, id2iri_mapping, failed_uploads, stashed_xml_texts, stashed_resptr_props)
- # create a dictionary to look up permissions
- permissions_lookup: dict[str, Permissions] = {}
- for key, perm in permissions.items():
- permissions_lookup[key] = perm.get_permission_instance()
+ # update the resources with the stashed XML texts
+ nonapplied_xml_texts = {}
+ if len(stashed_xml_texts) > 0:
+ try:
+ nonapplied_xml_texts = upload_stashed_xml_texts(verbose, id2iri_mapping, con, stashed_xml_texts)
+ except BaseException as err:
+ handle_upload_error(err, input_file, id2iri_mapping, failed_uploads, stashed_xml_texts, stashed_resptr_props)
+
+ # update the resources with the stashed resptrs
+ nonapplied_resptr_props = {}
+ if len(stashed_resptr_props) > 0:
+ try:
+ nonapplied_resptr_props = upload_stashed_resptr_props(verbose, id2iri_mapping, con, stashed_resptr_props)
+ except BaseException as err:
+ handle_upload_error(err, input_file, id2iri_mapping, failed_uploads, stashed_xml_texts, stashed_resptr_props)
+
+ # write log files
+ success = True
+ timestamp_str = datetime.now().strftime("%Y%m%d-%H%M%S")
+ write_id2iri_mapping(input_file, id2iri_mapping, timestamp_str)
+ if len(nonapplied_xml_texts) > 0:
+ write_stashed_xml_texts(nonapplied_xml_texts, timestamp_str)
+ success = False
+ if len(nonapplied_resptr_props) > 0:
+ write_stashed_resptr_props(nonapplied_resptr_props, timestamp_str)
+ success = False
+ if failed_uploads:
+ print(f"Could not upload the following resources: {failed_uploads}")
+ success = False
- # create a dictionary to look up resource classes
- resclass_name_2_type: dict[str, type] = {}
- for res_class_name in project.get_resclass_names():
- resclass_name_2_type[res_class_name] = project.get_resclass_type(res_class_name)
+ return success
- res_iri_lookup: dict[str, str] = {}
- failed_uploads = []
+def upload_resources(
+ verbose: bool,
+ resources: list[XMLResource],
+ imgdir: str,
+ sipi_server: Sipi,
+ permissions_lookup: dict[str, Permissions],
+ resclass_name_2_type: dict[str, type],
+ id2iri_mapping: dict[str, str],
+ con: Connection,
+ failed_uploads: list[str]
+) -> tuple[dict[str, str], list[str]]:
+ """
+ Iterates through all resources and tries to upload them to DSP
+
+ Args:
+ verbose: bool
+ resources: list of XMLResources to upload to DSP
+ imgdir: folder containing the multimedia files
+ sipi_server: Sipi instance
+ permissions_lookup: maps permission strings to Permission objects
+ resclass_name_2_type: maps resource class names to their types
+ id2iri_mapping: mapping of ids from the XML file to IRIs in DSP (initially empty, gets filled during the upload)
+ con: connection to DSP
+ failed_uploads: ids of resources that could not be uploaded (initially empty, gets filled during the upload)
+
+ Returns:
+ id2iri_mapping, failed_uploads: These two arguments are modified during the upload
+ """
+
for resource in resources:
if verbose:
resource.print()
@@ -910,94 +883,435 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s
if resource.ark:
resource_iri = convert_ark_v0_to_resource_iri(resource.ark)
+ # in case of a multimedia resource: upload the multimedia file
resource_bitstream = None
if resource.bitstream:
- img = sipi_server.upload_bitstream(os.path.join(imgdir, resource.bitstream.value))
+ img: Optional[dict[Any, Any]] = try_network_action(
+ object=sipi_server,
+ method='upload_bitstream',
+ kwargs={'filepath': os.path.join(imgdir, resource.bitstream.value)},
+ terminal_output_on_failure=f'ERROR while trying to create resource "{resource.label}" ({resource.id}).'
+ )
+ if not img:
+ failed_uploads.append(resource.id)
+ continue
internal_file_name_bitstream = img['uploadedFiles'][0]['internalFilename']
resource_bitstream = resource.get_bitstream(internal_file_name_bitstream, permissions_lookup)
- permissions_tmp = permissions_lookup.get(resource.permissions)
-
- try:
- # create a resource instance (ResourceInstance) from the given resource in the XML (XMLResource)
- resclass_type = resclass_name_2_type[resource.restype]
- properties = resource.get_propvals(res_iri_lookup, permissions_lookup)
- resclass_instance: ResourceInstance = resclass_type(
- con=con,
- label=resource.label,
- iri=resource_iri,
- permissions=permissions_tmp,
- bitstream=resource_bitstream,
- values=properties
- )
- resclass_instance = resclass_instance.create()
- except BaseError as err:
- print(f"ERROR while trying to create resource '{resource.label}' ({resource.id}). "
- f"The error message was: {err.message}")
+ # create the resource in DSP
+ resclass_type = resclass_name_2_type[resource.restype]
+ properties = resource.get_propvals(id2iri_mapping, permissions_lookup)
+ resclass_instance: ResourceInstance = try_network_action(
+ method=resclass_type,
+ kwargs={
+ 'con': con,
+ 'label': resource.label,
+ 'iri': resource_iri,
+ 'permissions': permissions_lookup.get(resource.permissions),
+ 'bitstream': resource_bitstream,
+ 'values': properties
+ },
+ terminal_output_on_failure=f"ERROR while trying to create resource '{resource.label}' ({resource.id})."
+ )
+ if not resclass_instance:
failed_uploads.append(resource.id)
continue
- except Exception as exception:
- print(f"EXCEPTION while trying to create resource '{resource.label}' ({resource.id}). "
- f"The exception message was: {exception}")
+
+ created_resource: ResourceInstance = try_network_action(
+ object=resclass_instance,
+ method='create',
+ terminal_output_on_failure=f"ERROR while trying to create resource '{resource.label}' ({resource.id})."
+ )
+ if not created_resource:
failed_uploads.append(resource.id)
continue
+ id2iri_mapping[resource.id] = created_resource.iri
+ print(f"Created resource '{created_resource.label}' ({resource.id}) with IRI '{created_resource.iri}'")
- res_iri_lookup[resource.id] = resclass_instance.iri
- print(f"Created resource '{resclass_instance.label}' ({resource.id}) with IRI '{resclass_instance.iri}'")
+ return id2iri_mapping, failed_uploads
- # update the resources with the stashed XML texts
- if len(stashed_xml_texts) > 0:
- print('Update the stashed XML texts...')
- for resource, link_props in stashed_xml_texts.items():
- print(f'Update XML text(s) of resource "{resource.id}"...')
- res_iri = res_iri_lookup[resource.id]
- try:
- update_xml_texts(
- resource=resource,
- res_iri=res_iri,
- link_props=link_props,
- res_iri_lookup=res_iri_lookup,
- con=con,
- verbose=verbose
- )
- except BaseError as err:
- print(f'BaseError while updating an XML text of resource "{resource.id}": {err.message}')
+
+def upload_stashed_xml_texts(
+ verbose: bool,
+ id2iri_mapping: dict[str, str],
+ con: Connection,
+ stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]]
+) -> dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]]:
+ """
+ After all resources are uploaded, the stashed xml texts must be applied to their resources in DSP.
+
+ Args:
+ verbose: bool
+ id2iri_mapping: mapping of ids from the XML file to IRIs in DSP
+ con: connection to DSP
+ stashed_xml_texts: all xml texts that have been stashed
+
+ Returns:
+ nonapplied_xml_texts: the xml texts that could not be uploaded
+ """
+
+ print('Upload the stashed XML texts...')
+ for resource, link_props in stashed_xml_texts.copy().items():
+ if resource.id not in id2iri_mapping:
+ # resource could not be uploaded to DSP, so the stash cannot be uploaded either
continue
- except Exception as exception:
- print(f'Exception while updating an XML text of resource "{resource.id}": {exception}')
+ print(f' Upload XML text(s) of resource "{resource.id}"...')
+ res_iri = id2iri_mapping[resource.id]
+ existing_resource = try_network_action(
+ object=con,
+ method='get',
+ kwargs={'path': f'/v2/resources/{quote_plus(res_iri)}'},
+ terminal_output_on_failure=f'ERROR while uploading the xml texts of resource "{resource.id}"'
+ )
+ if not existing_resource:
continue
+ for link_prop, hash_to_value in link_props.items():
+ existing_values = existing_resource[link_prop.name]
+ if not isinstance(existing_values, list):
+ existing_values = [existing_values, ]
+ for existing_value in existing_values:
+ old_xmltext = existing_value.get("knora-api:textValueAsXml")
+ if not old_xmltext:
+ continue
+
+ # strip all xml tags from the old xmltext, so that the pure text itself remains
+ pure_text = re.sub(r'(<\?xml.+>\s*)?\s*(.+)\s*<\/text>', r'\2', old_xmltext)
+
+ # if the pure text is a hash, the replacement must be made. This hash originates from
+ # stash_circular_references(), and identifies the XML texts
+ if pure_text not in hash_to_value:
+ continue
+ new_xmltext = hash_to_value[pure_text]
+
+ # replace the outdated internal ids by their IRI
+ for _id, _iri in id2iri_mapping.items():
+ new_xmltext.regex_replace(f'href="IRI:{_id}:IRI"', f'href="{_iri}"')
+
+ # prepare API call
+ jsonobj = {
+ "@id": res_iri,
+ "@type": resource.restype,
+ link_prop.name: {
+ "@id": existing_value['@id'],
+ "@type": "knora-api:TextValue",
+ "knora-api:textValueAsXml": new_xmltext,
+ "knora-api:textValueHasMapping": {
+ '@id': 'http://rdfh.ch/standoff/mappings/StandardMapping'
+ }
+ },
+ "@context": existing_resource['@context']
+ }
+ jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '), cls=KnoraStandoffXmlEncoder)
+
+ # execute API call
+ response = try_network_action(
+ object=con,
+ method='put',
+ kwargs={'path': '/v2/values', 'jsondata': jsondata},
+ terminal_output_on_failure=f'ERROR while uploading the xml text of "{link_prop.name}" '
+ f'of resource "{resource.id}"'
+ )
+ if not response:
+ continue
+ stashed_xml_texts[resource][link_prop].pop(pure_text)
+ if verbose:
+ print(f' Successfully uploaded xml text of "{link_prop.name}"\n')
+
+ # make a purged version of stashed_xml_texts, without empty entries
+ nonapplied_xml_texts = purge_stashed_xml_texts(stashed_xml_texts)
+ return nonapplied_xml_texts
+
+
+def purge_stashed_xml_texts(
+ stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]]
+) -> dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]]:
+ nonapplied_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]] = {}
+ for res, propdict in stashed_xml_texts.items():
+ for prop, xmldict in propdict.items():
+ if len(xmldict) > 0:
+ if res not in nonapplied_xml_texts:
+ nonapplied_xml_texts[res] = {}
+ nonapplied_xml_texts[res][prop] = xmldict
+ return nonapplied_xml_texts
+
+
+def upload_stashed_resptr_props(
+ verbose: bool,
+ id2iri_mapping: dict[str, str],
+ con: Connection,
+ stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]]
+) -> dict[XMLResource, dict[XMLProperty, list[str]]]:
+ """
+ After all resources are uploaded, the stashed resptr props must be applied to their resources in DSP.
- # update the resources with the stashed resptrs
- if len(stashed_resptr_props) > 0:
- print('Update the stashed resptrs...')
- for resource, prop_2_resptrs in stashed_resptr_props.items():
- print(f'Update resptrs of resource "{resource.id}"...')
- res_iri = res_iri_lookup[resource.id]
+ Args:
+ verbose: bool
+ id2iri_mapping: mapping of ids from the XML file to IRIs in DSP
+ con: connection to DSP
+ stashed_resptr_props: all resptr props that have been stashed
+
+ Returns:
+ nonapplied_resptr_props: the resptr props that could not be uploaded
+ """
+
+ print('Upload the stashed resptrs...')
+ for resource, prop_2_resptrs in stashed_resptr_props.copy().items():
+ if resource.id not in id2iri_mapping:
+ # resource could not be uploaded to DSP, so the stash cannot be uploaded either
+ continue
+ print(f' Upload resptrs of resource "{resource.id}"...')
+ res_iri = id2iri_mapping[resource.id]
+ existing_resource = con.get(path=f'/v2/resources/{quote_plus(res_iri)}')
+ for link_prop, resptrs in prop_2_resptrs.items():
+ for resptr in resptrs.copy():
+ jsonobj = {
+ '@id': res_iri,
+ '@type': resource.restype,
+ f'{link_prop.name}Value': {
+ '@type': 'knora-api:LinkValue',
+ 'knora-api:linkValueHasTargetIri': {
+ '@id': id2iri_mapping[resptr]
+ }
+ },
+ '@context': existing_resource['@context']
+ }
+ jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '))
+ response = try_network_action(
+ object=con,
+ method='post',
+ kwargs={'path': '/v2/values', 'jsondata': jsondata},
+ terminal_output_on_failure=f'ERROR while uploading the resptr prop of "{link_prop.name}" '
+ f'of resource "{resource.id}"'
+ )
+ if not response:
+ continue
+ stashed_resptr_props[resource][link_prop].remove(resptr)
+ if verbose:
+ print(f' Successfully uploaded resptr-prop of "{link_prop.name}"\n'
+ f' Value: {resptr}')
+
+ # make a purged version of stashed_resptr_props, without empty entries
+ nonapplied_resptr_props = purge_stashed_resptr_props(stashed_resptr_props)
+ return nonapplied_resptr_props
+
+
+def try_network_action(
+ terminal_output_on_failure: str,
+ method: Union[str, Callable[..., Any]],
+ object: Optional[Any] = None,
+ kwargs: Optional[dict[str, Any]] = None
+) -> Any:
+ """
+ Helper method that tries 7 times to execute an action. Each time, it catches ConnectionError and
+ requests.exceptions.RequestException, which lead to a waiting time and a retry. The waiting times are 1,
+ 2, 4, 8, 16, 32, 64 seconds. It also catches BaseError and Exception each time, which lead to a message being
+ printed and None being returned.
+ If there is still no success at the end, the message is printed and None is returned.
+
+ Args:
+ terminal_output_on_failure: message to be printed if action cannot be executed
+ method: either a callable to be called on its own, or a method name (as string) to be called on object
+ object: if provided, it must be a python variable/object, accompanied by a method name (as string)
+ kwargs: if provided, a dict with the arguments passed to method
+
+ Returns:
+ the return value of action, or None
+ """
+
+ for i in range(7):
try:
- update_resptr_props(
- resource=resource,
- res_iri=res_iri,
- prop_2_resptrs=prop_2_resptrs,
- res_iri_lookup=res_iri_lookup,
- con=con,
- verbose=verbose
- )
- except BaseError as err:
- print(f'BaseError while updating an XML text of resource "{resource.id}": {err.message}')
+ if object and isinstance(method, str):
+ if not kwargs:
+ return getattr(object, method)()
+ else:
+ return getattr(object, method)(**kwargs)
+ else:
+ if not kwargs:
+ return method()
+ else:
+ return method(**kwargs)
+ except ConnectionError:
+ print(f'{datetime.now().isoformat()}: Try reconnecting to DSP server, next attempt in {2 ** i} seconds...')
+ time.sleep(2 ** i)
continue
- except Exception as exception:
- print(f'Exception while updating an XML text of resource "{resource.id}": {exception}')
+ except RequestException:
+ print(f'{datetime.now().isoformat()}: Try reconnecting to DSP server, next attempt in {2 ** i} seconds...')
+ time.sleep(2 ** i)
continue
+ except BaseError:
+ print(terminal_output_on_failure)
+ return None
+ except Exception:
+ print(terminal_output_on_failure)
+ return None
+ print(terminal_output_on_failure)
+ return None
+
- # write mapping of internal IDs to IRIs to file with timestamp
- timestamp_now = datetime.now()
- timestamp_str = timestamp_now.strftime("%Y%m%d-%H%M%S")
+def purge_stashed_resptr_props(
+ stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]]
+) -> dict[XMLResource, dict[XMLProperty, list[str]]]:
+ nonapplied_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]] = {}
+ for res, propdict in stashed_resptr_props.items():
+ for prop, resptrs in propdict.items():
+ if len(resptrs) > 0:
+ if res not in nonapplied_resptr_props:
+ nonapplied_resptr_props[res] = {}
+ nonapplied_resptr_props[res][prop] = resptrs
+ return nonapplied_resptr_props
+
+
+def handle_upload_error(
+ err: BaseException,
+ input_file: str,
+ id2iri_mapping: dict[str, str],
+ failed_uploads: list[str],
+ stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
+ stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]]
+) -> None:
+ """
+ In case the xmlupload must be interrupted, e.g. because of an error that could not be handled, or due to keyboard
+ interrupt, this method ensures that all information about what is already in DSP is written into log files.
- xml_file_name = Path(input_file).stem
- res_iri_lookup_file = "id2iri_" + xml_file_name + "_mapping_" + timestamp_str + ".json"
- with open(res_iri_lookup_file, "w") as outfile:
- print(f"============\nThe mapping of internal IDs to IRIs was written to {res_iri_lookup_file}")
- outfile.write(json.dumps(res_iri_lookup))
+ It then re-raises the original error.
+ Args:
+ err: error that was the cause of the abort
+ input_file: file name of the original XML file
+ id2iri_mapping: mapping of ids from the XML file to IRIs in DSP (only successful uploads appear here)
+ failed_uploads: resources that caused an error when uploading to DSP
+ stashed_xml_texts: all xml texts that have been stashed
+ stashed_resptr_props: all resptr props that have been stashed
+
+ Returns:
+ None
+ """
+
+ print(f'\n=========================================='
+ f'\nxmlupload must be aborted because of an error')
+ timestamp_str = datetime.now().strftime("%Y%m%d-%H%M%S")
+
+ # write id2iri_mapping of the resources that are already in DSP
+ write_id2iri_mapping(input_file, id2iri_mapping, timestamp_str)
+
+ # Both stashes are purged from resources that have not been uploaded yet. Only stashed properties of resources that
+ # already exist in DSP are of interest.
+ stashed_xml_texts_purged = purge_stashed_xml_texts(
+ {res: propdict for res, propdict in stashed_xml_texts.items() if res.id in id2iri_mapping})
+ if len(stashed_xml_texts_purged) > 0:
+ write_stashed_xml_texts(stashed_xml_texts_purged, timestamp_str)
+
+ stashed_resptr_props_purged = purge_stashed_resptr_props(
+ {res: propdict for res, propdict in stashed_resptr_props.items() if res.id in id2iri_mapping})
+ if len(stashed_resptr_props_purged) > 0:
+ write_stashed_resptr_props(stashed_resptr_props_purged, timestamp_str)
+
+ # print the resources that threw an error when they were tried to be uploaded
if failed_uploads:
- print(f"Could not upload the following resources: {failed_uploads}")
+ print(f"Independently of this error, there were some resources that could not be uploaded: "
+ f"{failed_uploads}")
+
+ if isinstance(err, KeyboardInterrupt):
+ exit(1)
+ else:
+ print('The error will now be raised again:\n'
+ '==========================================\n')
+ raise err
+
+
+def write_id2iri_mapping(input_file: str, id2iri_mapping: dict[str, str], timestamp_str: str) -> None:
+ """
+ Write the id2iri mapping into a file. The timestamp must be created by the caller, so that different log files can
+ have an identical timestamp.
+
+ Args:
+ input_file: the file name of the original XML file
+ id2iri_mapping: mapping of ids from the XML file to IRIs in DSP
+ timestamp_str: timestamp for log file identification
+
+ Returns:
+ None
+ """
+
+ id2iri_mapping_file = "id2iri_" + Path(input_file).stem + "_mapping_" + timestamp_str + ".json"
+ with open(id2iri_mapping_file, "w") as outfile:
+ print(f"The mapping of internal IDs to IRIs was written to {id2iri_mapping_file}")
+ outfile.write(json.dumps(id2iri_mapping))
+
+
+def write_stashed_xml_texts(
+ stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
+ timestamp_str: str
+) -> None:
+ """
+ Write the stashed_xml_texts into a file. The timestamp must be created by the caller, so that different log files
+ can have an identical timestamp.
+
+ Args:
+ stashed_xml_texts: all xml texts that have been stashed
+ timestamp_str: timestamp for log file identification
+
+ Returns:
+ None
+ """
+
+ filename = f'stashed_text_properties_{timestamp_str}.txt'
+ print(f'There are stashed text properties that could not be reapplied to the resources they were stripped from. '
+ f'They were saved to {filename}')
+ with open(filename, 'a') as f:
+ f.write('Stashed text properties that could not be reapplied\n')
+ f.write('***************************************************\n')
+ f.write('During the xmlupload, some text properties had to be stashed away, because the salsah-links in their '
+ 'XML text formed a circle. The xmlupload can only be done if these circles are broken up, by stashing '
+ 'away some of the chain elements of the circle. \n'
+ 'Some of the resources that have been stripped from some of their text properties have been created in '
+ 'DSP, but the stashed text properties could not be reapplied to them, because the xmlupload was '
+ 'unexpectedly interrupted. \n'
+ 'This file is a list of all text properties that are now missing in DSP. The texts have been replaced '
+ 'by a hash number that now stands in the text field in DSP. \n'
+ '(Not listed are the stripped resources that haven\'t been created in DSP yet.) \n')
+ for res, props in stashed_xml_texts.items():
+ f.write(f'\n{res.id}')
+ f.write('\n' + '=' * len(res.id))
+ for prop, stashed_texts in props.items():
+ if len(stashed_texts) > 0:
+ f.write(f'\n{prop.name}')
+ f.write('\n' + '-' * len(prop.name))
+ for hash, standoff in stashed_texts.items():
+ f.write(f'\ntext with hash {hash}:\n{str(standoff).strip()}\n')
+
+
+def write_stashed_resptr_props(
+ stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]],
+ timestamp_str: str
+) -> None:
+ """
+ Write the stashed_resptr_props into a file. The timestamp must be created by the caller, so that different log files
+ can have an identical timestamp.
+
+ Args:
+ stashed_resptr_props: all resptr props that have been stashed
+ timestamp_str: timestamp for log file identification
+
+ Returns:
+ None
+ """
+
+ filename = f'stashed_resptr_properties_{timestamp_str}.txt'
+ print(f'There are stashed resptr properties that could not be reapplied to the resources they were stripped from. '
+ f'They were saved to {filename}')
+ with open(filename, 'a') as f:
+ f.write('Stashed resptr properties that could not be reapplied\n')
+ f.write('*****************************************************\n')
+ f.write('During the xmlupload, some resptr-props had to be stashed away, because they formed a circle. The '
+ 'xmlupload can only be done if these circles are broken up, by stashing away some of the chain '
+ 'elements of the circle. \n'
+ 'Some of the resources that have been stripped from some of their resptr-props have been created in '
+ 'DSP, but the stashed resptr-props could not be reapplied to them, because the xmlupload was '
+ 'unexpectedly interrupted. \n'
+ 'This file is a list of all resptr-props that are now missing in DSP. (Not listed are the stripped '
+ 'resources that haven\'t been created in DSP yet. \n')
+ for res, props_ in stashed_resptr_props.items():
+ f.write(f'\n{res.id}\n---------\n')
+ for prop, stashed_props in props_.items():
+ f.write(f'{prop.name}\n\t{stashed_props}\n')
diff --git a/test/e2e/test_tools.py b/test/e2e/test_tools.py
index d4557d450..d13ff3328 100644
--- a/test/e2e/test_tools.py
+++ b/test/e2e/test_tools.py
@@ -176,29 +176,34 @@ def test_create_ontology(self) -> None:
dump=False)
def test_xml_upload(self) -> None:
- xml_upload(input_file=self.test_data_file,
- server=self.server,
- user=self.user,
- password=self.password,
- imgdir=self.imgdir,
- sipi=self.sipi,
- verbose=False,
- validate_only=False,
- incremental=False)
+ result = xml_upload(
+ input_file=self.test_data_file,
+ server=self.server,
+ user=self.user,
+ password=self.password,
+ imgdir=self.imgdir,
+ sipi=self.sipi,
+ verbose=False,
+ validate_only=False,
+ incremental=False)
+ self.assertTrue(result)
mapping_file = ''
for mapping in [x for x in os.scandir('.') if x.name.startswith('id2iri_test-data_mapping_')]:
delta = datetime.datetime.now() - datetime.datetime.fromtimestamp(mapping.stat().st_mtime_ns / 1000000000)
if delta.seconds < 15:
mapping_file = mapping.name
+ self.assertNotEqual(mapping_file, '')
+ id2iri_replaced_xml_filename = 'testdata/tmp/_test-id2iri-replaced.xml'
id_to_iri(xml_file='testdata/test-id2iri-data.xml',
json_file=mapping_file,
- out_file='testdata/tmp/_test-id2iri-replaced.xml',
+ out_file=id2iri_replaced_xml_filename,
verbose=False)
+ self.assertEqual(os.path.isfile(id2iri_replaced_xml_filename), True)
- xml_upload(
- input_file='testdata/tmp/_test-id2iri-replaced.xml',
+ result = xml_upload(
+ input_file=id2iri_replaced_xml_filename,
server=self.server,
user=self.user,
password=self.password,
@@ -208,6 +213,9 @@ def test_xml_upload(self) -> None:
validate_only=False,
incremental=True
)
+ self.assertTrue(result)
+ self.assertTrue(all([not f.name.startswith('stashed_text_properties_') for f in os.scandir('.')]))
+ self.assertTrue(all([not f.name.startswith('stashed_resptr_properties_') for f in os.scandir('.')]))
if __name__ == '__main__':
diff --git a/testdata/bitstreams/Dummy.mp4 b/testdata/bitstreams/Dummy.mp4
index f4cf78c93..18c5513a7 100644
Binary files a/testdata/bitstreams/Dummy.mp4 and b/testdata/bitstreams/Dummy.mp4 differ