diff --git a/.github/workflows/tests-on-push.yml b/.github/workflows/tests-on-push.yml
index b7e77dde5..2e38e89a7 100644
--- a/.github/workflows/tests-on-push.yml
+++ b/.github/workflows/tests-on-push.yml
@@ -108,4 +108,6 @@ jobs:
- name: Install python & poetry, build & install wheel, install pytest
uses: ./.github/actions/setup-from-wheel
- name: distribution tests
- run: .dist-test-venv/bin/pytest test/distribution/
+ run: .dist-test-venv/bin/pytest --noconftest test/distribution/
+ # Reason for the --noconftest flag:
+ # test/conftest.py configures logging, but loguru should not be installed for the distribution tests
diff --git a/src/dsp_tools/commands/resume_xmlupload/resume_xmlupload.py b/src/dsp_tools/commands/resume_xmlupload/resume_xmlupload.py
index 6b5f008f4..6729fc460 100644
--- a/src/dsp_tools/commands/resume_xmlupload/resume_xmlupload.py
+++ b/src/dsp_tools/commands/resume_xmlupload/resume_xmlupload.py
@@ -1,12 +1,9 @@
import pickle
import sys
-from copy import deepcopy
-from dataclasses import replace
from loguru import logger
from termcolor import colored
-from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
from dsp_tools.commands.xmlupload.list_client import ListClient
from dsp_tools.commands.xmlupload.list_client import ListClientLive
from dsp_tools.commands.xmlupload.models.sipi import Sipi
@@ -36,33 +33,9 @@ def resume_xmlupload(server: str, user: str, password: str, sipi: str, skip_firs
"""
upload_state = _read_upload_state_from_disk(server)
if skip_first_resource:
- if len(upload_state.pending_resources) > 0:
- new_pending = deepcopy(upload_state.pending_resources)
- new_pending.pop(0)
- upload_state = replace(upload_state, pending_resources=new_pending)
- else:
- msg = (
- "The list of pending resources is empty.\n"
- "It is not yet possible to skip the first item of the stashed properties.\n"
- "Do you want to continue with the upload of the stashed properties anyway? [y/n]"
- )
- resp = None
- while resp not in ["y", "n"]:
- resp = input(colored(msg, color="red"))
- if resp == "n":
- sys.exit(1)
-
- previous_successful = len(upload_state.iri_resolver_lookup)
- previous_failed = len(upload_state.failed_uploads)
- previous_total = previous_successful + previous_failed
- msg = (
- f"Resuming upload for project {upload_state.config.shortcode} on server {server}. "
- f"Number of resources uploaded until now: {previous_total}"
- )
- if previous_failed:
- msg += f" ({previous_failed} of them failed)"
- logger.info(msg)
- print("\n==========================\n" + msg + "\n==========================\n")
+ _skip_first_resource(upload_state)
+
+ _print_and_log(upload_state, server)
con = ConnectionLive(server)
con.login(user, password)
@@ -72,21 +45,15 @@ def resume_xmlupload(server: str, user: str, password: str, sipi: str, skip_firs
project_client: ProjectClient = ProjectClientLive(con, upload_state.config.shortcode)
list_client: ListClient = ListClientLive(con, project_client.get_project_iri())
- iri_resolver, failed_uploads, nonapplied_stash = upload_resources(
- resources=upload_state.pending_resources,
- failed_uploads=upload_state.failed_uploads,
+ upload_resources(
+ upload_state=upload_state,
imgdir=".",
sipi_server=sipi_server,
- permissions_lookup=upload_state.permissions_lookup,
- con=con,
- stash=upload_state.pending_stash,
- config=upload_state.config,
project_client=project_client,
list_client=list_client,
- iri_resolver=IriResolver(upload_state.iri_resolver_lookup),
)
- return cleanup_upload(iri_resolver, upload_state.config, failed_uploads, nonapplied_stash)
+ return cleanup_upload(upload_state)
def _read_upload_state_from_disk(server: str) -> UploadState:
@@ -94,3 +61,33 @@ def _read_upload_state_from_disk(server: str) -> UploadState:
with open(save_location, "rb") as f:
saved_state: UploadState = pickle.load(f) # noqa: S301 (deserialization of untrusted data)
return saved_state
+
+
+def _skip_first_resource(upload_state: UploadState) -> None:
+ if len(upload_state.pending_resources) > 0:
+ upload_state.pending_resources.pop(0)
+ else:
+ msg = (
+ "The list of pending resources is empty.\n"
+ "It is not yet possible to skip the first item of the stashed properties.\n"
+ "Do you want to continue with the upload of the stashed properties anyway? [y/n]"
+ )
+ resp = None
+ while resp not in ["y", "n"]:
+ resp = input(colored(msg, color="red"))
+ if resp == "n":
+ sys.exit(1)
+
+
+def _print_and_log(upload_state: UploadState, server: str) -> None:
+ previous_successful = len(upload_state.iri_resolver.lookup)
+ previous_failed = len(upload_state.failed_uploads)
+ previous_total = previous_successful + previous_failed
+ msg = (
+ f"Resuming upload for project {upload_state.config.shortcode} on server {server}. "
+ f"Number of resources uploaded until now: {previous_total}"
+ )
+ if previous_failed:
+ msg += f" ({previous_failed} of them failed)"
+ logger.info(msg)
+ print("\n==========================\n" + msg + "\n==========================\n")
diff --git a/src/dsp_tools/commands/xmlupload/models/upload_state.py b/src/dsp_tools/commands/xmlupload/models/upload_state.py
index 73b212c0c..a4c6b6b54 100644
--- a/src/dsp_tools/commands/xmlupload/models/upload_state.py
+++ b/src/dsp_tools/commands/xmlupload/models/upload_state.py
@@ -1,12 +1,13 @@
from dataclasses import dataclass
+from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
from dsp_tools.commands.xmlupload.models.permission import Permissions
from dsp_tools.commands.xmlupload.models.xmlresource import XMLResource
from dsp_tools.commands.xmlupload.stash.stash_models import Stash
from dsp_tools.commands.xmlupload.upload_config import UploadConfig
-@dataclass(frozen=True)
+@dataclass
class UploadState:
"""
Save the state of an xmlupload, so that after an interruption, it can be resumed.
@@ -14,7 +15,7 @@ class UploadState:
pending_resources: list[XMLResource]
failed_uploads: list[str]
- iri_resolver_lookup: dict[str, str]
+ iri_resolver: IriResolver
pending_stash: Stash | None
config: UploadConfig
permissions_lookup: dict[str, Permissions]
diff --git a/src/dsp_tools/commands/xmlupload/project_client.py b/src/dsp_tools/commands/xmlupload/project_client.py
index 74854efd3..f8e70b8e3 100644
--- a/src/dsp_tools/commands/xmlupload/project_client.py
+++ b/src/dsp_tools/commands/xmlupload/project_client.py
@@ -19,6 +19,10 @@ class ProjectInfo:
class ProjectClient(Protocol):
"""Interface (protocol) for project-related requests to the DSP-API."""
+ con: Connection
+ shortcode: str
+ project_info: ProjectInfo | None
+
def get_project_iri(self) -> str:
"""Get the IRI of the project to which the data is being uploaded."""
diff --git a/src/dsp_tools/commands/xmlupload/resource_create_client.py b/src/dsp_tools/commands/xmlupload/resource_create_client.py
index 3dc543c84..9390d4bd9 100644
--- a/src/dsp_tools/commands/xmlupload/resource_create_client.py
+++ b/src/dsp_tools/commands/xmlupload/resource_create_client.py
@@ -3,6 +3,7 @@
from pathlib import Path
from typing import Any
from typing import assert_never
+from typing import cast
from loguru import logger
@@ -37,17 +38,15 @@ def create_resource(
self,
resource: XMLResource,
bitstream_information: BitstreamInfo | None,
- ) -> tuple[str, str]:
- """Creates a resource on the DSP server."""
+ ) -> str:
+ """Creates a resource on the DSP server, and returns its IRI"""
logger.info(
f"Attempting to create resource {resource.res_id} (label: {resource.label}, iri: {resource.iri})..."
)
resource_dict = self._make_resource_with_values(resource, bitstream_information)
headers = {"X-Asset-Ingested": "true"} if self.media_previously_ingested else None
res = self.con.post(route="/v2/resources", data=resource_dict, headers=headers)
- iri = res["@id"]
- label = res["rdfs:label"]
- return iri, label
+ return cast(str, res["@id"])
def _make_resource_with_values(
self,
diff --git a/src/dsp_tools/commands/xmlupload/stash/stash_models.py b/src/dsp_tools/commands/xmlupload/stash/stash_models.py
index ae1a57aff..90e1b4f5c 100644
--- a/src/dsp_tools/commands/xmlupload/stash/stash_models.py
+++ b/src/dsp_tools/commands/xmlupload/stash/stash_models.py
@@ -106,3 +106,9 @@ def make(standoff_stash: StandoffStash | None, link_value_stash: LinkValueStash
if standoff_stash or link_value_stash:
return Stash(standoff_stash, link_value_stash)
return None
+
+ def is_empty(self) -> bool:
+ """Check if there are any stashed items in this stash"""
+ standoff = not self.standoff_stash or not self.standoff_stash.res_2_stash_items
+ link = not self.link_value_stash or not self.link_value_stash.res_2_stash_items
+ return standoff and link
diff --git a/src/dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py b/src/dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py
index 43b3b9068..ad3c2ec7f 100644
--- a/src/dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py
+++ b/src/dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py
@@ -2,40 +2,39 @@
from datetime import datetime
from typing import Any
+from typing import cast
from loguru import logger
-from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
+from dsp_tools.commands.xmlupload.models.upload_state import UploadState
from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStash
from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStashItem
+from dsp_tools.commands.xmlupload.stash.stash_models import Stash
from dsp_tools.models.exceptions import BaseError
from dsp_tools.utils.connection import Connection
def upload_stashed_resptr_props(
- iri_resolver: IriResolver,
+ upload_state: UploadState,
con: Connection,
- stashed_resptr_props: LinkValueStash,
context: dict[str, str],
-) -> LinkValueStash | None:
+) -> None:
"""
After all resources are uploaded, the stashed resptr props must be applied to their resources in DSP.
+ The upload state is updated accordingly, as a side effect.
Args:
- iri_resolver: resolver with a mapping of ids from the XML file to IRIs in DSP
+ upload_state: the current state of the upload
con: connection to DSP
- stashed_resptr_props: all resptr props that have been stashed
context: the JSON-LD context of the resource
-
- Returns:
- nonapplied_resptr_props: the resptr props that could not be uploaded
"""
print(f"{datetime.now()}: Upload the stashed resptrs...")
logger.info("Upload the stashed resptrs...")
- not_uploaded: list[LinkValueStashItem] = []
- for res_id, stash_items in stashed_resptr_props.res_2_stash_items.copy().items():
- res_iri = iri_resolver.get(res_id)
+ upload_state.pending_stash = cast(Stash, upload_state.pending_stash)
+ link_value_stash = cast(LinkValueStash, upload_state.pending_stash.link_value_stash)
+ for res_id, stash_items in link_value_stash.res_2_stash_items.copy().items():
+ res_iri = upload_state.iri_resolver.get(res_id)
if not res_iri:
# resource could not be uploaded to DSP, so the stash cannot be uploaded either
# no action necessary: this resource will remain in nonapplied_resptr_props,
@@ -44,16 +43,13 @@ def upload_stashed_resptr_props(
print(f"{datetime.now()}: Upload resptrs of resource '{res_id}'...")
logger.info(f" Upload resptrs of resource '{res_id}'...")
for stash_item in stash_items:
- target_iri = iri_resolver.get(stash_item.target_id)
+ target_iri = upload_state.iri_resolver.get(stash_item.target_id)
if not target_iri:
continue
if _upload_stash_item(stash_item, res_iri, target_iri, con, context):
- stashed_resptr_props.res_2_stash_items[res_id].remove(stash_item)
- else:
- not_uploaded.append(stash_item)
- if not stashed_resptr_props.res_2_stash_items[res_id]:
- del stashed_resptr_props.res_2_stash_items[res_id]
- return LinkValueStash.make(not_uploaded)
+ link_value_stash.res_2_stash_items[res_id].remove(stash_item)
+ if not link_value_stash.res_2_stash_items[res_id]:
+ del link_value_stash.res_2_stash_items[res_id]
def _upload_stash_item(
diff --git a/src/dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py b/src/dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py
index 9dbb02a29..1cc507052 100644
--- a/src/dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py
+++ b/src/dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py
@@ -2,14 +2,17 @@
from datetime import datetime
from typing import Any
+from typing import cast
from urllib.parse import quote_plus
from loguru import logger
from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue
+from dsp_tools.commands.xmlupload.models.upload_state import UploadState
from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStash
from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStashItem
+from dsp_tools.commands.xmlupload.stash.stash_models import Stash
from dsp_tools.models.exceptions import BaseError
from dsp_tools.utils.connection import Connection
@@ -93,28 +96,22 @@ def _create_XMLResource_json_object_to_update(
return jsonobj
-def upload_stashed_xml_texts(
- iri_resolver: IriResolver,
- con: Connection,
- stashed_xml_texts: StandoffStash,
-) -> StandoffStash | None:
+def upload_stashed_xml_texts(upload_state: UploadState, con: Connection) -> None:
"""
After all resources are uploaded, the stashed xml texts must be applied to their resources in DSP.
+ The upload state is updated accordingly, as a side effect.
Args:
- iri_resolver: resolver to map ids from the XML file to IRIs in DSP
+ upload_state: the current state of the upload
con: connection to DSP
- stashed_xml_texts: all xml texts that have been stashed
-
- Returns:
- the xml texts that could not be uploaded
"""
print(f"{datetime.now()}: Upload the stashed XML texts...")
logger.info("Upload the stashed XML texts...")
- not_uploaded: list[StandoffStashItem] = []
- for res_id, stash_items in stashed_xml_texts.res_2_stash_items.copy().items():
- res_iri = iri_resolver.get(res_id)
+ upload_state.pending_stash = cast(Stash, upload_state.pending_stash)
+ standoff_stash = cast(StandoffStash, upload_state.pending_stash.standoff_stash)
+ for res_id, stash_items in standoff_stash.res_2_stash_items.copy().items():
+ res_iri = upload_state.iri_resolver.get(res_id)
if not res_iri:
# resource could not be uploaded to DSP, so the stash cannot be uploaded either
# no action necessary: this resource will remain in the list of not uploaded stash items,
@@ -131,25 +128,20 @@ def upload_stashed_xml_texts(
for stash_item in stash_items:
value_iri = _get_value_iri(stash_item.prop_name, resource_in_triplestore, stash_item.uuid)
if not value_iri:
- not_uploaded.append(stash_item)
continue
- success = _upload_stash_item(
+ if _upload_stash_item(
stash_item=stash_item,
res_iri=res_iri,
res_type=stash_item.res_type,
res_id=res_id,
value_iri=value_iri,
- iri_resolver=iri_resolver,
+ iri_resolver=upload_state.iri_resolver,
con=con,
context=context,
- )
- if success:
- stashed_xml_texts.res_2_stash_items[res_id].remove(stash_item)
- else:
- not_uploaded.append(stash_item)
- if not stashed_xml_texts.res_2_stash_items[res_id]:
- stashed_xml_texts.res_2_stash_items.pop(res_id)
- return StandoffStash.make(not_uploaded)
+ ):
+ standoff_stash.res_2_stash_items[res_id].remove(stash_item)
+ if not standoff_stash.res_2_stash_items[res_id]:
+ standoff_stash.res_2_stash_items.pop(res_id)
def _get_value_iri(
diff --git a/src/dsp_tools/commands/xmlupload/xmlupload.py b/src/dsp_tools/commands/xmlupload/xmlupload.py
index d7a251d80..048547636 100644
--- a/src/dsp_tools/commands/xmlupload/xmlupload.py
+++ b/src/dsp_tools/commands/xmlupload/xmlupload.py
@@ -2,6 +2,7 @@
import pickle
import sys
+import warnings
from datetime import datetime
from pathlib import Path
from typing import Any
@@ -32,6 +33,7 @@
from dsp_tools.commands.xmlupload.stash.upload_stashed_xml_texts import upload_stashed_xml_texts
from dsp_tools.commands.xmlupload.upload_config import UploadConfig
from dsp_tools.commands.xmlupload.write_diagnostic_info import write_id2iri_mapping
+from dsp_tools.models.custom_warnings import DspToolsUserWarning
from dsp_tools.models.exceptions import BaseError
from dsp_tools.models.exceptions import PermanentTimeOutError
from dsp_tools.models.exceptions import UserError
@@ -105,60 +107,51 @@ def xmlupload(
project_client: ProjectClient = ProjectClientLive(con, config.shortcode)
list_client: ListClient = ListClientLive(con, project_client.get_project_iri())
- iri_resolver = IriResolver()
+ upload_state = UploadState(resources, [], IriResolver(), stash, config, permissions_lookup)
- iri_resolver, failed_uploads, nonapplied_stash = upload_resources(
- resources=resources,
- failed_uploads=[],
+ upload_resources(
+ upload_state=upload_state,
imgdir=imgdir,
sipi_server=sipi_server,
- permissions_lookup=permissions_lookup,
- con=con,
- stash=stash,
- config=config,
project_client=project_client,
list_client=list_client,
- iri_resolver=iri_resolver,
)
- return cleanup_upload(iri_resolver, config, failed_uploads, nonapplied_stash)
+ return cleanup_upload(upload_state)
-def cleanup_upload(
- iri_resolver: IriResolver,
- config: UploadConfig,
- failed_uploads: list[str],
- nonapplied_stash: Stash | None,
-) -> bool:
+def cleanup_upload(upload_state: UploadState) -> bool:
"""
Write the id2iri mapping to a file and print a message to the console.
Args:
- iri_resolver: mapping from internal IDs to IRIs
- config: the upload configuration
- failed_uploads: resources that caused an error when uploading to DSP
- nonapplied_stash: the stash items that could not be reapplied
+ upload_state: the current state of the upload
Returns:
- success status (deduced from failed_uploads)
+ success status (deduced from failed_uploads and non-applied stash)
"""
- write_id2iri_mapping(iri_resolver.lookup, config.diagnostics)
- if not failed_uploads and not nonapplied_stash:
+ write_id2iri_mapping(upload_state.iri_resolver.lookup, upload_state.config.diagnostics)
+ has_stash_failed = upload_state.pending_stash and not upload_state.pending_stash.is_empty()
+ if not upload_state.failed_uploads and not has_stash_failed:
success = True
print(f"{datetime.now()}: All resources have successfully been uploaded.")
logger.info("All resources have successfully been uploaded.")
else:
success = False
- if failed_uploads:
- print(f"\n{datetime.now()}: WARNING: Could not upload the following resources: {failed_uploads}\n")
+ if upload_state.failed_uploads:
+ res_msg = f"Could not upload the following resources: {upload_state.failed_uploads}"
+ print(f"\n{datetime.now()}: WARNING: {res_msg}\n")
print(f"For more information, see the log file: {logger_savepath}\n")
- logger.warning(f"Could not upload the following resources: {failed_uploads}")
- if nonapplied_stash:
- print(f"\n{datetime.now()}: WARNING: Could not reapply the following stash items: {nonapplied_stash}\n")
+ logger.warning(res_msg)
+ if has_stash_failed:
+ stash_msg = f"Could not reapply the following stash items: {upload_state.pending_stash}"
+ print(f"\n{datetime.now()}: WARNING: {stash_msg}\n")
print(f"For more information, see the log file: {logger_savepath}\n")
- logger.warning(f"Could not reapply the following stash items: {nonapplied_stash}")
+ logger.warning(stash_msg)
+ msg = _save_upload_state(upload_state)
+ print(msg)
- config.diagnostics.save_location.unlink(missing_ok=True)
+ upload_state.config.diagnostics.save_location.unlink(missing_ok=True)
return success
@@ -185,90 +178,34 @@ def _prepare_upload(
def upload_resources(
- resources: list[XMLResource],
- failed_uploads: list[str],
+ upload_state: UploadState,
imgdir: str,
sipi_server: Sipi,
- permissions_lookup: dict[str, Permissions],
- con: Connection,
- stash: Stash | None,
- config: UploadConfig,
project_client: ProjectClient,
list_client: ListClient,
- iri_resolver: IriResolver,
-) -> tuple[IriResolver, list[str], Stash | None]:
+) -> None:
"""
Actual upload of all resources to DSP.
Args:
- resources: list of XMLResources to upload to DSP
- failed_uploads: resources that caused an error in a previous upload
+ upload_state: the current state of the upload
imgdir: folder containing the multimedia files
sipi_server: Sipi instance
- permissions_lookup: dictionary that contains the permission name as string and the corresponding Python object
- con: connection to the DSP server
- stash: an object that contains all stashed links that could not be reapplied to their resources
- config: the upload configuration
project_client: a client for HTTP communication with the DSP-API
list_client: a client for HTTP communication with the DSP-API
- iri_resolver: mapping from internal IDs to IRIs
-
- Returns:
- the id2iri mapping of the uploaded resources,
- a list of resources that could not be uploaded,
- and the stash items that could not be reapplied.
"""
try:
- iri_resolver, failed_uploads = _upload_resources(
- resources=resources,
- failed_uploads=failed_uploads,
+ _upload_resources(
+ upload_state=upload_state,
imgdir=imgdir,
sipi_server=sipi_server,
- permissions_lookup=permissions_lookup,
- con=con,
- config=config,
project_client=project_client,
list_client=list_client,
- iri_resolver=iri_resolver,
- )
- except BaseException as err: # noqa: BLE001 (blind-except)
- # The forseeable errors are already handled by failed_uploads
- # Here we catch the unforseeable exceptions, incl. keyboard interrupt.
- _handle_upload_error(
- err=err,
- iri_resolver=iri_resolver,
- pending_resources=resources,
- failed_uploads=failed_uploads,
- pending_stash=stash,
- config=config,
- permissions_lookup=permissions_lookup,
- )
-
- nonapplied_stash = None
- try:
- nonapplied_stash = (
- _upload_stash(
- stash=stash,
- iri_resolver=iri_resolver,
- con=con,
- project_client=project_client,
- )
- if stash
- else None
- )
- except BaseException as err: # noqa: BLE001 (blind-except)
- # The forseeable errors are already handled by failed_uploads and nonapplied_stash.
- # Here we catch the unforseeable exceptions, incl. keyboard interrupt.
- _handle_upload_error(
- err=err,
- iri_resolver=iri_resolver,
- pending_resources=resources,
- failed_uploads=failed_uploads,
- pending_stash=stash,
- config=config,
- permissions_lookup=permissions_lookup,
)
- return iri_resolver, failed_uploads, nonapplied_stash
+ if upload_state.pending_stash:
+ _upload_stash(upload_state, project_client)
+ except XmlUploadInterruptedError as err:
+ _handle_upload_error(err, upload_state)
def _get_data_from_xml(
@@ -284,30 +221,14 @@ def _get_data_from_xml(
def _upload_stash(
- stash: Stash,
- iri_resolver: IriResolver,
- con: Connection,
+ upload_state: UploadState,
project_client: ProjectClient,
-) -> Stash | None:
- if stash.standoff_stash:
- nonapplied_standoff = upload_stashed_xml_texts(
- iri_resolver=iri_resolver,
- con=con,
- stashed_xml_texts=stash.standoff_stash,
- )
- else:
- nonapplied_standoff = None
+) -> None:
+ if upload_state.pending_stash and upload_state.pending_stash.standoff_stash:
+ upload_stashed_xml_texts(upload_state, project_client.con)
context = get_json_ld_context_for_project(project_client.get_ontology_name_dict())
- if stash.link_value_stash:
- nonapplied_resptr_props = upload_stashed_resptr_props(
- iri_resolver=iri_resolver,
- con=con,
- stashed_resptr_props=stash.link_value_stash,
- context=context,
- )
- else:
- nonapplied_resptr_props = None
- return Stash.make(nonapplied_standoff, nonapplied_resptr_props)
+ if upload_state.pending_stash and upload_state.pending_stash.link_value_stash:
+ upload_stashed_resptr_props(upload_state, project_client.con, context)
def _get_project_context_from_server(connection: Connection) -> ProjectContext:
@@ -343,129 +264,137 @@ def _extract_resources_from_xml(root: etree._Element, default_ontology: str) ->
def _upload_resources(
- resources: list[XMLResource],
- failed_uploads: list[str],
+ upload_state: UploadState,
imgdir: str,
sipi_server: Sipi,
- permissions_lookup: dict[str, Permissions],
- con: Connection,
- config: UploadConfig,
project_client: ProjectClient,
list_client: ListClient,
- iri_resolver: IriResolver,
-) -> tuple[IriResolver, list[str]]:
+) -> None:
"""
Iterates through all resources and tries to upload them to DSP.
If a temporary exception occurs, the action is repeated until success,
and if a permanent exception occurs, the resource is skipped.
Args:
- resources: list of XMLResources to upload to DSP
- failed_uploads: resources that caused an error in a previous upload
+ upload_state: the current state of the upload
imgdir: folder containing the multimedia files
sipi_server: Sipi instance
- permissions_lookup: maps permission strings to Permission objects
- con: connection to DSP
- config: the upload configuration
project_client: a client for HTTP communication with the DSP-API
list_client: a client for HTTP communication with the DSP-API
- iri_resolver: mapping from internal IDs to IRIs
Raises:
BaseException: in case of an unhandled exception during resource creation
XmlUploadInterruptedError: if the number of resources created is equal to the interrupt_after value
-
- Returns:
- id2iri_mapping, failed_uploads
"""
project_iri = project_client.get_project_iri()
json_ld_context = get_json_ld_context_for_project(project_client.get_ontology_name_dict())
listnode_lookup = list_client.get_list_node_id_to_iri_lookup()
resource_create_client = ResourceCreateClient(
- con=con,
+ con=project_client.con,
project_iri=project_iri,
- iri_resolver=iri_resolver,
+ iri_resolver=upload_state.iri_resolver,
json_ld_context=json_ld_context,
- permissions_lookup=permissions_lookup,
+ permissions_lookup=upload_state.permissions_lookup,
listnode_lookup=listnode_lookup,
- media_previously_ingested=config.media_previously_uploaded,
+ media_previously_ingested=upload_state.config.media_previously_uploaded,
)
- total_res = len(resources) + len(iri_resolver.lookup)
- previous_successful = len(iri_resolver.lookup)
- previous_failed = len(failed_uploads)
- previous_total = previous_successful + previous_failed
- # if the interrupt_after value is not set, the upload will not be interrupted
- interrupt_after = config.interrupt_after or total_res + 1
+ for creation_attempts_of_this_round, resource in enumerate(upload_state.pending_resources.copy()):
+ _upload_one_resource(
+ upload_state=upload_state,
+ resource=resource,
+ imgdir=imgdir,
+ sipi_server=sipi_server,
+ resource_create_client=resource_create_client,
+ creation_attempts_of_this_round=creation_attempts_of_this_round,
+ )
+
- for i, resource in enumerate(resources.copy()):
- current_res = i + 1 + previous_total
- if i >= interrupt_after:
- raise XmlUploadInterruptedError(f"Interrupted: Maximum number of resources was reached ({interrupt_after})")
+def _upload_one_resource(
+ upload_state: UploadState,
+ resource: XMLResource,
+ imgdir: str,
+ sipi_server: Sipi,
+ resource_create_client: ResourceCreateClient,
+ creation_attempts_of_this_round: int,
+) -> None:
+ try:
success, media_info = handle_media_info(
- resource, config.media_previously_uploaded, sipi_server, imgdir, permissions_lookup
+ resource,
+ upload_state.config.media_previously_uploaded,
+ sipi_server,
+ imgdir,
+ upload_state.permissions_lookup,
)
if not success:
- failed_uploads.append(resource.res_id)
- continue
-
- res = None
- try:
- res = _create_resource(resource, media_info, resource_create_client)
- if res == (None, None):
- # resource creation failed gracefully: register it as failed, then continue
- failed_uploads.append(resource.res_id)
- continue
- else:
- # resource creation succeeded: update the iri_resolver and remove the resource from the list
- iri, label = res
- _tidy_up_resource_creation(iri, label, iri_resolver, resource, current_res, total_res) # type: ignore[arg-type]
- except PermanentTimeOutError:
- msg = (
- f"There was a timeout while trying to create resource '{resource.res_id}'.\n"
- f"It is unclear if the resource '{resource.res_id}' was created successfully or not.\n"
- f"Please check manually in the DSP-APP or DB.\n"
- f"In case of successful creation, call 'resume-xmlupload' with the flag "
- f"'--skip-first-resource' to prevent duplication.\n"
- f"If not, a normal 'resume-xmlupload' can be started."
- )
- logger.error(msg)
- raise XmlUploadInterruptedError(msg)
- except BaseException as err: # noqa: BLE001 (blind-except)
- if res and res[0]:
- # creation succeeded, but during tidy up, a Keyboard Interrupt occurred. tidy up again before escalating
- iri, label = res
- _tidy_up_resource_creation(iri, label, iri_resolver, resource, current_res, total_res)
- else:
- # unhandled exception during resource creation
- failed_uploads.append(resource.res_id)
- raise err from None
- finally:
- resources.remove(resource)
-
- return iri_resolver, failed_uploads
-
-
-def _tidy_up_resource_creation(
- iri: str,
- label: str,
- iri_resolver: IriResolver,
+ upload_state.failed_uploads.append(resource.res_id)
+ return
+ except KeyboardInterrupt:
+ raise XmlUploadInterruptedError("KeyboardInterrupt during media file upload") from None
+
+ try:
+ iri = _create_resource(resource, media_info, resource_create_client)
+ except (PermanentTimeOutError, KeyboardInterrupt) as err:
+ warnings.warn(DspToolsUserWarning(f"{type(err).__name__}: Tidying up, then exit..."))
+ msg = (
+ f"There was a {type(err).__name__} while trying to create resource '{resource.res_id}'.\n"
+ f"It is unclear if the resource '{resource.res_id}' was created successfully or not.\n"
+ f"Please check manually in the DSP-APP or DB.\n"
+ f"In case of successful creation, call 'resume-xmlupload' with the flag "
+ f"'--skip-first-resource' to prevent duplication.\n"
+ f"If not, a normal 'resume-xmlupload' can be started."
+ )
+ logger.error(msg)
+ raise XmlUploadInterruptedError(msg) from None
+
+ try:
+ _tidy_up_resource_creation_idempotent(upload_state, iri, resource)
+ _interrupt_if_indicated(upload_state, creation_attempts_of_this_round)
+ except KeyboardInterrupt:
+ warnings.warn(DspToolsUserWarning("KeyboardInterrupt: Tidying up, then exit..."))
+ _tidy_up_resource_creation_idempotent(upload_state, iri, resource)
+ raise XmlUploadInterruptedError("KeyboardInterrupt during tidy up") from None
+
+
+def _interrupt_if_indicated(upload_state: UploadState, creation_attempts_of_this_round: int) -> None:
+ # if the interrupt_after value is not set, the upload will not be interrupted
+ interrupt_after = upload_state.config.interrupt_after or 999_999_999
+ if creation_attempts_of_this_round + 1 >= interrupt_after:
+ msg = f"Interrupted: Maximum number of resources was reached ({upload_state.config.interrupt_after})"
+ raise XmlUploadInterruptedError(msg)
+
+
+def _tidy_up_resource_creation_idempotent(
+ upload_state: UploadState,
+ iri: str | None,
resource: XMLResource,
- current_res: int,
- total_res: int,
) -> None:
- iri_resolver.update(resource.res_id, iri)
- resource_designation = f"'{label}' (ID: '{resource.res_id}', IRI: '{iri}')"
- print(f"{datetime.now()}: Created resource {current_res}/{total_res}: {resource_designation}")
- logger.info(f"Created resource {current_res}/{total_res}: {resource_designation}")
+ previous_successful = len(upload_state.iri_resolver.lookup)
+ previous_failed = len(upload_state.failed_uploads)
+ upcoming = len(upload_state.pending_resources)
+ current_res = previous_successful + previous_failed + 1
+ total_res = previous_successful + previous_failed + upcoming
+ if iri:
+ # resource creation succeeded: update the iri_resolver
+ upload_state.iri_resolver.lookup[resource.res_id] = iri
+ msg = f"Created resource {current_res}/{total_res}: '{resource.label}' (ID: '{resource.res_id}', IRI: '{iri}')"
+ print(f"{datetime.now()}: {msg}")
+ logger.info(msg)
+ else: # noqa: PLR5501
+ # resource creation failed gracefully: register it as failed
+ if resource.res_id not in upload_state.failed_uploads:
+ upload_state.failed_uploads.append(resource.res_id)
+
+ if resource in upload_state.pending_resources:
+ upload_state.pending_resources.remove(resource)
def _create_resource(
resource: XMLResource,
bitstream_information: BitstreamInfo | None,
resource_create_client: ResourceCreateClient,
-) -> tuple[str, str] | tuple[None, None]:
+) -> str | None:
try:
return resource_create_client.create_resource(resource, bitstream_information)
except PermanentTimeOutError as err:
@@ -484,18 +413,10 @@ def _create_resource(
f"Property details:\n" + "\n".join([str(vars(prop)) for prop in resource.properties])
)
logger.exception(log_msg)
- return None, None
+ return None
-def _handle_upload_error(
- err: BaseException,
- iri_resolver: IriResolver,
- pending_resources: list[XMLResource],
- failed_uploads: list[str],
- pending_stash: Stash | None,
- config: UploadConfig,
- permissions_lookup: dict[str, Permissions],
-) -> None:
+def _handle_upload_error(err: BaseException, upload_state: UploadState) -> None:
"""
In case the xmlupload must be interrupted,
e.g. because of an error that could not be handled,
@@ -508,12 +429,7 @@ def _handle_upload_error(
Args:
err: the error that was the cause of the abort
- iri_resolver: a resolver for internal IDs to IRIs
- pending_resources: resources that were not uploaded to DSP
- failed_uploads: resources that caused an error when uploading to DSP
- pending_stash: an object that contains all stashed links that could not yet be reapplied to their resources
- config: the upload configuration
- permissions_lookup: dictionary that contains the permission name as string and the corresponding Python object
+ upload_state: the current state of the upload
"""
if isinstance(err, XmlUploadInterruptedError):
msg = "\n==========================================\n" + err.message + "\n"
@@ -527,13 +443,10 @@ def _handle_upload_error(
)
exit_code = 1
- upload_state = UploadState(
- pending_resources, failed_uploads, iri_resolver.lookup, pending_stash, config, permissions_lookup
- )
msg += _save_upload_state(upload_state)
- if failed_uploads:
- msg += f"Independently from this, there were some resources that could not be uploaded: {failed_uploads}\n"
+ if failed := upload_state.failed_uploads:
+ msg += f"Independently from this, there were some resources that could not be uploaded: {failed}\n"
if exit_code == 1:
logger.exception(msg)
@@ -550,4 +463,5 @@ def _save_upload_state(upload_state: UploadState) -> str:
save_location.touch(exist_ok=True)
with open(save_location, "wb") as file:
pickle.dump(upload_state, file)
+ logger.info(f"Saved the current upload state to {save_location}")
return f"Saved the current upload state to {save_location}.\n"
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 000000000..36c4951ab
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,38 @@
+import logging
+from typing import Iterator
+
+import pytest
+from _pytest.logging import caplog as _caplog # noqa: F401 (imported but unused)
+from loguru import logger
+
+from dsp_tools.utils.logger_config import logger_config
+
+
+@pytest.fixture()
+def caplog(_caplog: pytest.LogCaptureFixture) -> Iterator[pytest.LogCaptureFixture]: # noqa: F811 (redefinition)
+ """
+ The caplog fixture that comes shipped with pytest does not support loguru.
+ This modified version can be used exactly like the builtin caplog fixture,
+ which is documented at https://docs.pytest.org/en/latest/how-to/logging.html#caplog-fixture.
+ Credits: https://www.youtube.com/watch?v=eFdVlyAGeZU
+
+ Yields:
+ pytest.LogCaptureFixture: The modified caplog fixture.
+ """
+
+ class PropagateHandler(logging.Handler):
+ def emit(self, record: logging.LogRecord) -> None:
+ logging.getLogger(record.name).handle(record)
+
+ handler_id = logger.add(sink=PropagateHandler(), format="{message}", level="DEBUG")
+ yield _caplog
+ logger.remove(handler_id)
+
+
+def pytest_sessionstart() -> None:
+ """
+ Called after the Session object has been created
+ and before performing collection and entering the run test loop.
+ See https://docs.pytest.org/en/latest/reference/reference.html#pytest.hookspec.pytest_sessionstart.
+ """
+ logger_config()
diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py
deleted file mode 100644
index e28dc796b..000000000
--- a/test/e2e/conftest.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from dsp_tools.utils.logger_config import logger_config
-
-
-def pytest_sessionstart() -> None:
- """
- Called after the Session object has been created
- and before performing collection and entering the run test loop.
- See https://docs.pytest.org/en/latest/reference/reference.html#pytest.hookspec.pytest_sessionstart.
- """
- logger_config()
diff --git a/test/integration/commands/xmlupload/stash/test_upload_stash_with_mock.py b/test/integration/commands/xmlupload/stash/test_upload_stash_with_mock.py
index 8dddf9893..2f419fe6b 100644
--- a/test/integration/commands/xmlupload/stash/test_upload_stash_with_mock.py
+++ b/test/integration/commands/xmlupload/stash/test_upload_stash_with_mock.py
@@ -6,11 +6,14 @@
from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue
+from dsp_tools.commands.xmlupload.models.upload_state import UploadState
+from dsp_tools.commands.xmlupload.project_client import ProjectInfo
from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStash
from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStashItem
from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStash
from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStashItem
from dsp_tools.commands.xmlupload.stash.stash_models import Stash
+from dsp_tools.commands.xmlupload.upload_config import UploadConfig
from dsp_tools.commands.xmlupload.xmlupload import _upload_stash
from dsp_tools.utils.connection import Connection
@@ -18,9 +21,14 @@
# ruff: noqa: D102 (undocumented-public-method)
+@dataclass
class ProjectClientStub:
"""Stub class for ProjectClient."""
+ con: Connection
+ shortcode: str
+ project_info: ProjectInfo | None
+
def get_project_iri(self) -> str:
raise NotImplementedError("get_project_iri not implemented")
@@ -87,13 +95,9 @@ def test_upload_link_value_stash(self) -> None:
}
)
con: Connection = ConnectionMock(post_responses=[{}])
- nonapplied = _upload_stash(
- stash=stash,
- iri_resolver=iri_resolver,
- con=con,
- project_client=ProjectClientStub(),
- )
- assert nonapplied is None
+ upload_state = UploadState([], [], iri_resolver, stash, UploadConfig(), {})
+ _upload_stash(upload_state, ProjectClientStub(con, "1234", None))
+ assert not upload_state.pending_stash or upload_state.pending_stash.is_empty()
class TestUploadTextValueStashes:
@@ -136,13 +140,9 @@ def test_upload_text_value_stash(self) -> None:
],
put_responses=[{}],
)
- nonapplied = _upload_stash(
- stash=stash,
- iri_resolver=iri_resolver,
- con=con,
- project_client=ProjectClientStub(),
- )
- assert nonapplied is None
+ upload_state = UploadState([], [], iri_resolver, stash, UploadConfig(), {})
+ _upload_stash(upload_state, ProjectClientStub(con, "1234", None))
+ assert not upload_state.pending_stash or upload_state.pending_stash.is_empty()
def test_not_upload_text_value_stash_if_uuid_not_on_value(self) -> None:
"""
@@ -182,10 +182,6 @@ def test_not_upload_text_value_stash_if_uuid_not_on_value(self) -> None:
],
put_responses=[{}],
)
- nonapplied = _upload_stash(
- stash=stash,
- iri_resolver=iri_resolver,
- con=con,
- project_client=ProjectClientStub(),
- )
- assert nonapplied == stash
+ upload_state = UploadState([], [], iri_resolver, stash, UploadConfig(), {})
+ _upload_stash(upload_state, ProjectClientStub(con, "1234", None))
+ assert upload_state.pending_stash == stash
diff --git a/test/integration/commands/xmlupload/test_resource_creation.py b/test/integration/commands/xmlupload/test_resource_creation.py
new file mode 100644
index 000000000..4d0cddd7d
--- /dev/null
+++ b/test/integration/commands/xmlupload/test_resource_creation.py
@@ -0,0 +1,413 @@
+from copy import deepcopy
+from dataclasses import dataclass
+from unittest.mock import Mock
+
+import pytest
+from lxml import etree
+
+from dsp_tools.commands.xmlupload import xmlupload
+from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
+from dsp_tools.commands.xmlupload.models.sipi import Sipi
+from dsp_tools.commands.xmlupload.models.upload_state import UploadState
+from dsp_tools.commands.xmlupload.models.xmlresource import XMLResource
+from dsp_tools.commands.xmlupload.project_client import ProjectInfo
+from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStash
+from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStashItem
+from dsp_tools.commands.xmlupload.stash.stash_models import Stash
+from dsp_tools.commands.xmlupload.upload_config import UploadConfig
+from dsp_tools.models.exceptions import PermanentTimeOutError
+from dsp_tools.models.exceptions import XmlUploadInterruptedError
+from dsp_tools.utils.connection import Connection
+from dsp_tools.utils.connection_live import ConnectionLive
+
+
+class ListClientMock:
+ def get_list_node_id_to_iri_lookup(self) -> dict[str, str]:
+ return {}
+
+
+@dataclass
+class ProjectClientStub:
+ """Stub class for ProjectClient."""
+
+ con: Connection
+ shortcode: str
+ project_info: ProjectInfo | None
+
+ def get_project_iri(self) -> str:
+ return "https://admin.test.dasch.swiss/project/MsOaiQkcQ7-QPxsYBKckfQ"
+
+ def get_ontology_iris(self) -> list[str]:
+ raise NotImplementedError("get_project_iri not implemented")
+
+ def get_ontology_name_dict(self) -> dict[str, str]:
+ return {}
+
+ def get_ontology_iri_dict(self) -> dict[str, str]:
+ raise NotImplementedError("get_project_iri not implemented")
+
+
+def test_one_resource_without_links() -> None:
+ xml_strings = [
+ """
+
+ foo_1 text
+
+ """,
+ ]
+ xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings]
+ upload_state = UploadState(xml_resources, [], IriResolver(), None, UploadConfig(), {})
+ con = Mock(spec_set=ConnectionLive)
+ post_responses = [{"@id": "foo_1_iri", "rdfs:label": "foo_1_label"}]
+ con.post = Mock(side_effect=post_responses)
+ project_client = ProjectClientStub(con, "1234", None)
+
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+
+ assert len(con.post.call_args_list) == len(post_responses)
+ match con.post.call_args_list[0].kwargs:
+ case {
+ "route": "/v2/resources",
+ "data": {
+ "@type": "my_onto:foo_1_type",
+ "rdfs:label": "foo_1_label",
+ "knora-api:attachedToProject": {"@id": "https://admin.test.dasch.swiss/project/MsOaiQkcQ7-QPxsYBKckfQ"},
+ "@context": dict(),
+ "my_onto:hasSimpleText": [{"@type": "knora-api:TextValue", "knora-api:valueAsString": "foo_1 text"}],
+ },
+ }:
+ assert True
+ case _:
+ pytest.fail("POST request was not sent correctly")
+ assert not upload_state.pending_resources
+ assert not upload_state.failed_uploads
+ assert upload_state.iri_resolver.lookup == {"foo_1_id": "foo_1_iri"}
+ assert not upload_state.pending_stash
+
+
+def test_one_resource_with_link_to_existing_resource() -> None:
+ xml_strings = [
+ """
+
+ foo_2_id
+
+ """,
+ ]
+ xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings]
+ upload_state = UploadState(xml_resources, [], IriResolver({"foo_2_id": "foo_2_iri"}), None, UploadConfig(), {})
+ con = Mock(spec_set=ConnectionLive)
+ post_responses = [{"@id": "foo_1_iri", "rdfs:label": "foo_1_label"}]
+ con.post = Mock(side_effect=post_responses)
+ project_client = ProjectClientStub(con, "1234", None)
+
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+
+ assert len(con.post.call_args_list) == len(post_responses)
+ match con.post.call_args_list[0].kwargs:
+ case {
+ "route": "/v2/resources",
+ "data": {
+ "@type": "my_onto:foo_1_type",
+ "rdfs:label": "foo_1_label",
+ "knora-api:attachedToProject": {"@id": "https://admin.test.dasch.swiss/project/MsOaiQkcQ7-QPxsYBKckfQ"},
+ "@context": dict(),
+ "my_onto:hasCustomLinkValue": [
+ {"@type": "knora-api:LinkValue", "knora-api:linkValueHasTargetIri": {"@id": "foo_2_iri"}}
+ ],
+ },
+ }:
+ assert True
+ case _:
+ pytest.fail("POST request was not sent correctly")
+ assert not upload_state.pending_resources
+ assert not upload_state.failed_uploads
+ assert upload_state.iri_resolver.lookup == {"foo_1_id": "foo_1_iri", "foo_2_id": "foo_2_iri"}
+ assert not upload_state.pending_stash
+
+
+def test_2_resources_with_stash() -> None:
+ xml_strings = [
+ '',
+ '',
+ ]
+ xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings]
+ link_val_stash_dict = {
+ "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")],
+ "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")],
+ }
+ stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None)
+ upload_state = UploadState(xml_resources, [], IriResolver(), deepcopy(stash), UploadConfig(), {})
+ con = Mock(spec_set=ConnectionLive)
+ post_responses = [
+ {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"},
+ {"@id": "foo_2_iri", "rdfs:label": "foo_2_label"},
+ {}, # uploading a stash doesn't rely on a certain response
+ {}, # uploading a stash doesn't rely on a certain response
+ ]
+ con.post = Mock(side_effect=post_responses)
+ project_client = ProjectClientStub(con, "1234", None)
+
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+
+ assert len(con.post.call_args_list) == len(post_responses)
+ match con.post.call_args_list[2].kwargs:
+ case {
+ "route": "/v2/values",
+ "data": {
+ "@type": "my_onto:foo_1_type",
+ "@id": "foo_1_iri",
+ "@context": dict(),
+ "my_onto:hasCustomLinkValue": {
+ "@type": "knora-api:LinkValue",
+ "knora-api:linkValueHasTargetIri": {"@id": "foo_2_iri"},
+ },
+ },
+ }:
+ assert True
+ case _:
+ pytest.fail("POST request was not sent correctly")
+ assert not upload_state.pending_resources
+ assert not upload_state.failed_uploads
+ assert upload_state.iri_resolver.lookup == {"foo_1_id": "foo_1_iri", "foo_2_id": "foo_2_iri"}
+ assert not upload_state.pending_stash or upload_state.pending_stash.is_empty()
+
+
+def test_2_resources_with_stash_interrupted_by_timeout() -> None:
+ _2_resources_with_stash_interrupted_by_error(PermanentTimeOutError(""), "PermanentTimeOutError")
+
+
+def test_2_resources_with_stash_interrupted_by_keyboard() -> None:
+ _2_resources_with_stash_interrupted_by_error(KeyboardInterrupt(), "KeyboardInterrupt")
+
+
+def _2_resources_with_stash_interrupted_by_error(err_to_interrupt_with: BaseException, err_as_str: str) -> None:
+ xml_strings = [
+ '',
+ '',
+ ]
+ xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings]
+ link_val_stash_dict = {
+ "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")],
+ "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")],
+ }
+ stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None)
+ upload_state = UploadState(xml_resources.copy(), [], IriResolver(), deepcopy(stash), UploadConfig(), {})
+ con = Mock(spec_set=ConnectionLive)
+ post_responses = [
+ {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"},
+ err_to_interrupt_with,
+ ]
+ con.post = Mock(side_effect=post_responses)
+ project_client = ProjectClientStub(con, "1234", None)
+ xmlupload._handle_upload_error = Mock()
+
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+
+ assert len(con.post.call_args_list) == len(post_responses)
+ err_msg = (
+ f"There was a {err_as_str} while trying to create resource 'foo_2_id'.\n"
+ "It is unclear if the resource 'foo_2_id' was created successfully or not.\n"
+ "Please check manually in the DSP-APP or DB.\n"
+ "In case of successful creation, call 'resume-xmlupload' with the flag "
+ "'--skip-first-resource' to prevent duplication.\n"
+ "If not, a normal 'resume-xmlupload' can be started."
+ )
+ upload_state_expected = UploadState(
+ xml_resources[1:], [], IriResolver({"foo_1_id": "foo_1_iri"}), stash, UploadConfig(), {}
+ )
+ xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected)
+
+
+def test_5_resources_with_stash_and_interrupt_after_2() -> None:
+ xml_strings = [
+ '',
+ '',
+ '',
+ '',
+ '',
+ ]
+ xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings]
+ link_val_stash_dict = {
+ "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")],
+ "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")],
+ }
+ stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None)
+ upload_config = UploadConfig(interrupt_after=2)
+ upload_state = UploadState(xml_resources.copy(), [], IriResolver(), deepcopy(stash), upload_config, {})
+ con = Mock(spec_set=ConnectionLive)
+ post_responses = [
+ {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"},
+ {"@id": "foo_2_iri", "rdfs:label": "foo_2_label"},
+ {"@id": "foo_3_iri", "rdfs:label": "foo_3_label"},
+ {"@id": "foo_4_iri", "rdfs:label": "foo_4_label"},
+ {"@id": "foo_5_iri", "rdfs:label": "foo_5_label"},
+ {}, # uploading a stash doesn't rely on a certain response
+ {}, # uploading a stash doesn't rely on a certain response
+ ]
+ con.post = Mock(side_effect=post_responses)
+ project_client = ProjectClientStub(con, "1234", None)
+ xmlupload._handle_upload_error = Mock()
+ err_msg = "Interrupted: Maximum number of resources was reached (2)"
+
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ iri_resolver_expected = IriResolver({"foo_1_id": "foo_1_iri", "foo_2_id": "foo_2_iri"})
+ upload_state_expected = UploadState(xml_resources[2:], [], iri_resolver_expected, stash, upload_config, {})
+ xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected)
+
+ xmlupload._handle_upload_error = Mock()
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ iri_resolver_expected.lookup.update({"foo_3_id": "foo_3_iri", "foo_4_id": "foo_4_iri"})
+ upload_state_expected = UploadState(xml_resources[4:], [], iri_resolver_expected, stash, upload_config, {})
+ xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected)
+
+ xmlupload._handle_upload_error = Mock()
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ iri_resolver_expected.lookup.update({"foo_5_id": "foo_5_iri"})
+ empty_stash = Stash(standoff_stash=None, link_value_stash=LinkValueStash({}))
+ upload_state_expected = UploadState([], [], iri_resolver_expected, empty_stash, upload_config, {})
+ xmlupload._handle_upload_error.assert_not_called()
+ assert upload_state == upload_state_expected
+
+
+def test_6_resources_with_stash_and_interrupt_after_2() -> None:
+ xml_strings = [
+ '',
+ '',
+ '',
+ '',
+ '',
+ '',
+ ]
+ xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings]
+ link_val_stash_dict = {
+ "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")],
+ "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")],
+ }
+ stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None)
+ upload_config = UploadConfig(interrupt_after=2)
+ upload_state = UploadState(xml_resources.copy(), [], IriResolver(), deepcopy(stash), upload_config, {})
+ con = Mock(spec_set=ConnectionLive)
+ post_responses = [
+ {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"},
+ {"@id": "foo_2_iri", "rdfs:label": "foo_2_label"},
+ {"@id": "foo_3_iri", "rdfs:label": "foo_3_label"},
+ {"@id": "foo_4_iri", "rdfs:label": "foo_4_label"},
+ {"@id": "foo_5_iri", "rdfs:label": "foo_5_label"},
+ {"@id": "foo_6_iri", "rdfs:label": "foo_6_label"},
+ {}, # uploading a stash doesn't rely on a certain response
+ {}, # uploading a stash doesn't rely on a certain response
+ ]
+ con.post = Mock(side_effect=post_responses)
+ project_client = ProjectClientStub(con, "1234", None)
+ xmlupload._handle_upload_error = Mock()
+ err_msg = "Interrupted: Maximum number of resources was reached (2)"
+
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ iri_resolver_expected = IriResolver({"foo_1_id": "foo_1_iri", "foo_2_id": "foo_2_iri"})
+ upload_state_expected = UploadState(xml_resources[2:], [], iri_resolver_expected, stash, upload_config, {})
+ xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected)
+
+ xmlupload._handle_upload_error = Mock()
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ iri_resolver_expected.lookup.update({"foo_3_id": "foo_3_iri", "foo_4_id": "foo_4_iri"})
+ upload_state_expected = UploadState(xml_resources[4:], [], iri_resolver_expected, stash, upload_config, {})
+ xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected)
+
+ xmlupload._handle_upload_error = Mock()
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ iri_resolver_expected.lookup.update({"foo_5_id": "foo_5_iri", "foo_6_id": "foo_6_iri"})
+ upload_state_expected = UploadState([], [], iri_resolver_expected, stash, upload_config, {})
+ xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected)
+
+ xmlupload._handle_upload_error = Mock()
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ empty_stash = Stash(standoff_stash=None, link_value_stash=LinkValueStash({}))
+ upload_state_expected = UploadState([], [], iri_resolver_expected, empty_stash, upload_config, {})
+ xmlupload._handle_upload_error.assert_not_called()
+ assert upload_state == upload_state_expected
+
+
+def test_logging(caplog: pytest.LogCaptureFixture) -> None:
+ xml_strings = [
+ '',
+ '',
+ '',
+ '',
+ '',
+ ]
+ xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings]
+ link_val_stash_dict = {
+ "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")],
+ "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")],
+ }
+ stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None)
+ upload_config = UploadConfig(interrupt_after=2)
+ upload_state = UploadState(xml_resources.copy(), [], IriResolver(), deepcopy(stash), upload_config, {})
+ con = Mock(spec_set=ConnectionLive)
+ post_responses = [
+ {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"},
+ {"@id": "foo_2_iri", "rdfs:label": "foo_2_label"},
+ {"@id": "foo_3_iri", "rdfs:label": "foo_3_label"},
+ {"@id": "foo_4_iri", "rdfs:label": "foo_4_label"},
+ {"@id": "foo_5_iri", "rdfs:label": "foo_5_label"},
+ {}, # uploading a stash doesn't rely on a certain response
+ {}, # uploading a stash doesn't rely on a certain response
+ ]
+ con.post = Mock(side_effect=post_responses)
+ project_client = ProjectClientStub(con, "1234", None)
+ xmlupload._handle_upload_error = Mock()
+
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ assert caplog.records[1].message == "Created resource 1/5: 'foo_1_label' (ID: 'foo_1_id', IRI: 'foo_1_iri')"
+ assert caplog.records[3].message == "Created resource 2/5: 'foo_2_label' (ID: 'foo_2_id', IRI: 'foo_2_iri')"
+ caplog.clear()
+
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ assert caplog.records[1].message == "Created resource 3/5: 'foo_3_label' (ID: 'foo_3_id', IRI: 'foo_3_iri')"
+ assert caplog.records[3].message == "Created resource 4/5: 'foo_4_label' (ID: 'foo_4_id', IRI: 'foo_4_iri')"
+ caplog.clear()
+
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ assert caplog.records[1].message == "Created resource 5/5: 'foo_5_label' (ID: 'foo_5_id', IRI: 'foo_5_iri')"
+ assert caplog.records[3].message == " Upload resptrs of resource 'foo_1_id'..."
+ assert caplog.records[5].message == " Upload resptrs of resource 'foo_2_id'..."
+ caplog.clear()
+
+
+def test_post_requests() -> None:
+ xml_strings = [
+ '',
+ '',
+ '',
+ '',
+ '',
+ '',
+ ]
+ xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings]
+ link_val_stash_dict = {
+ "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")],
+ "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")],
+ }
+ stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None)
+ upload_config = UploadConfig(interrupt_after=2)
+ upload_state = UploadState(xml_resources.copy(), [], IriResolver(), deepcopy(stash), upload_config, {})
+ con = Mock(spec_set=ConnectionLive)
+ post_responses = [
+ {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"},
+ {"@id": "foo_2_iri", "rdfs:label": "foo_2_label"},
+ {"@id": "foo_3_iri", "rdfs:label": "foo_3_label"},
+ {"@id": "foo_4_iri", "rdfs:label": "foo_4_label"},
+ {"@id": "foo_5_iri", "rdfs:label": "foo_5_label"},
+ {"@id": "foo_6_iri", "rdfs:label": "foo_6_label"},
+ {}, # uploading a stash doesn't rely on a certain response
+ {}, # uploading a stash doesn't rely on a certain response
+ ]
+ con.post = Mock(side_effect=post_responses)
+ project_client = ProjectClientStub(con, "1234", None)
+ xmlupload._handle_upload_error = Mock()
+
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock())
+ assert len(con.post.call_args_list) == len(post_responses)
diff --git a/test/integration/commands/xmlupload/test_upload_state.py b/test/integration/commands/xmlupload/test_upload_state.py
index 50af8df50..a9d4055c0 100644
--- a/test/integration/commands/xmlupload/test_upload_state.py
+++ b/test/integration/commands/xmlupload/test_upload_state.py
@@ -3,6 +3,7 @@
from lxml import etree
+from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
from dsp_tools.commands.xmlupload.models.upload_state import UploadState
from dsp_tools.commands.xmlupload.models.xmlresource import XMLResource
from dsp_tools.commands.xmlupload.upload_config import DiagnosticsConfig
@@ -23,7 +24,7 @@ def test_save_upload_state(tmp_path: Path) -> None:
upload_state = UploadState(
pending_resources=[XMLResource(etree.fromstring(resource_str), default_ontology="test")],
failed_uploads=[],
- iri_resolver_lookup={"foo": "bar"},
+ iri_resolver=IriResolver({"foo": "bar"}),
pending_stash=None,
config=config,
permissions_lookup={},
@@ -34,7 +35,7 @@ def test_save_upload_state(tmp_path: Path) -> None:
assert msg == f"Saved the current upload state to {save_location}.\n"
assert len(upload_state.pending_resources) == len(saved_state.pending_resources)
assert [r.res_id for r in upload_state.pending_resources] == [r.res_id for r in saved_state.pending_resources]
- assert upload_state.iri_resolver_lookup == saved_state.iri_resolver_lookup
+ assert upload_state.iri_resolver.lookup == saved_state.iri_resolver.lookup
assert upload_state.pending_stash == saved_state.pending_stash
assert upload_state.config == saved_state.config
assert upload_state.permissions_lookup == saved_state.permissions_lookup
diff --git a/test/unittests/commands/xmlupload/test_resource_creation.py b/test/unittests/commands/xmlupload/test_resource_creation.py
new file mode 100644
index 000000000..a11b4d362
--- /dev/null
+++ b/test/unittests/commands/xmlupload/test_resource_creation.py
@@ -0,0 +1,53 @@
+from lxml import etree
+
+from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
+from dsp_tools.commands.xmlupload.models.upload_state import UploadState
+from dsp_tools.commands.xmlupload.models.xmlresource import XMLResource
+from dsp_tools.commands.xmlupload.upload_config import UploadConfig
+from dsp_tools.commands.xmlupload.xmlupload import _tidy_up_resource_creation_idempotent
+
+
+def test_idempotency_on_success() -> None:
+ xml_strings = [
+ """
+
+ foo_1 text
+
+ """,
+ """
+
+ foo_2 text
+
+ """,
+ ]
+ xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings]
+ upload_state = UploadState(xml_resources.copy(), [], IriResolver(), None, UploadConfig(), {})
+ for _ in range(3):
+ _tidy_up_resource_creation_idempotent(upload_state, "foo_1_iri", xml_resources[0])
+ assert upload_state.pending_resources == xml_resources[1:]
+ assert upload_state.failed_uploads == []
+ assert upload_state.iri_resolver.lookup == {"foo_1_id": "foo_1_iri"}
+ assert not upload_state.pending_stash
+
+
+def test_idempotency_on_failure() -> None:
+ xml_strings = [
+ """
+
+ foo_1 text
+
+ """,
+ """
+
+ foo_2 text
+
+ """,
+ ]
+ xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings]
+ upload_state = UploadState(xml_resources.copy(), [], IriResolver(), None, UploadConfig(), {})
+ for _ in range(3):
+ _tidy_up_resource_creation_idempotent(upload_state, None, xml_resources[0])
+ assert upload_state.pending_resources == xml_resources[1:]
+ assert upload_state.failed_uploads == ["foo_1_id"]
+ assert upload_state.iri_resolver.lookup == {}
+ assert not upload_state.pending_stash