diff --git a/.github/workflows/tests-on-push.yml b/.github/workflows/tests-on-push.yml index b7e77dde5..2e38e89a7 100644 --- a/.github/workflows/tests-on-push.yml +++ b/.github/workflows/tests-on-push.yml @@ -108,4 +108,6 @@ jobs: - name: Install python & poetry, build & install wheel, install pytest uses: ./.github/actions/setup-from-wheel - name: distribution tests - run: .dist-test-venv/bin/pytest test/distribution/ + run: .dist-test-venv/bin/pytest --noconftest test/distribution/ + # Reason for the --noconftest flag: + # test/conftest.py configures logging, but loguru should not be installed for the distribution tests diff --git a/src/dsp_tools/commands/resume_xmlupload/resume_xmlupload.py b/src/dsp_tools/commands/resume_xmlupload/resume_xmlupload.py index 6b5f008f4..6729fc460 100644 --- a/src/dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +++ b/src/dsp_tools/commands/resume_xmlupload/resume_xmlupload.py @@ -1,12 +1,9 @@ import pickle import sys -from copy import deepcopy -from dataclasses import replace from loguru import logger from termcolor import colored -from dsp_tools.commands.xmlupload.iri_resolver import IriResolver from dsp_tools.commands.xmlupload.list_client import ListClient from dsp_tools.commands.xmlupload.list_client import ListClientLive from dsp_tools.commands.xmlupload.models.sipi import Sipi @@ -36,33 +33,9 @@ def resume_xmlupload(server: str, user: str, password: str, sipi: str, skip_firs """ upload_state = _read_upload_state_from_disk(server) if skip_first_resource: - if len(upload_state.pending_resources) > 0: - new_pending = deepcopy(upload_state.pending_resources) - new_pending.pop(0) - upload_state = replace(upload_state, pending_resources=new_pending) - else: - msg = ( - "The list of pending resources is empty.\n" - "It is not yet possible to skip the first item of the stashed properties.\n" - "Do you want to continue with the upload of the stashed properties anyway? [y/n]" - ) - resp = None - while resp not in ["y", "n"]: - resp = input(colored(msg, color="red")) - if resp == "n": - sys.exit(1) - - previous_successful = len(upload_state.iri_resolver_lookup) - previous_failed = len(upload_state.failed_uploads) - previous_total = previous_successful + previous_failed - msg = ( - f"Resuming upload for project {upload_state.config.shortcode} on server {server}. " - f"Number of resources uploaded until now: {previous_total}" - ) - if previous_failed: - msg += f" ({previous_failed} of them failed)" - logger.info(msg) - print("\n==========================\n" + msg + "\n==========================\n") + _skip_first_resource(upload_state) + + _print_and_log(upload_state, server) con = ConnectionLive(server) con.login(user, password) @@ -72,21 +45,15 @@ def resume_xmlupload(server: str, user: str, password: str, sipi: str, skip_firs project_client: ProjectClient = ProjectClientLive(con, upload_state.config.shortcode) list_client: ListClient = ListClientLive(con, project_client.get_project_iri()) - iri_resolver, failed_uploads, nonapplied_stash = upload_resources( - resources=upload_state.pending_resources, - failed_uploads=upload_state.failed_uploads, + upload_resources( + upload_state=upload_state, imgdir=".", sipi_server=sipi_server, - permissions_lookup=upload_state.permissions_lookup, - con=con, - stash=upload_state.pending_stash, - config=upload_state.config, project_client=project_client, list_client=list_client, - iri_resolver=IriResolver(upload_state.iri_resolver_lookup), ) - return cleanup_upload(iri_resolver, upload_state.config, failed_uploads, nonapplied_stash) + return cleanup_upload(upload_state) def _read_upload_state_from_disk(server: str) -> UploadState: @@ -94,3 +61,33 @@ def _read_upload_state_from_disk(server: str) -> UploadState: with open(save_location, "rb") as f: saved_state: UploadState = pickle.load(f) # noqa: S301 (deserialization of untrusted data) return saved_state + + +def _skip_first_resource(upload_state: UploadState) -> None: + if len(upload_state.pending_resources) > 0: + upload_state.pending_resources.pop(0) + else: + msg = ( + "The list of pending resources is empty.\n" + "It is not yet possible to skip the first item of the stashed properties.\n" + "Do you want to continue with the upload of the stashed properties anyway? [y/n]" + ) + resp = None + while resp not in ["y", "n"]: + resp = input(colored(msg, color="red")) + if resp == "n": + sys.exit(1) + + +def _print_and_log(upload_state: UploadState, server: str) -> None: + previous_successful = len(upload_state.iri_resolver.lookup) + previous_failed = len(upload_state.failed_uploads) + previous_total = previous_successful + previous_failed + msg = ( + f"Resuming upload for project {upload_state.config.shortcode} on server {server}. " + f"Number of resources uploaded until now: {previous_total}" + ) + if previous_failed: + msg += f" ({previous_failed} of them failed)" + logger.info(msg) + print("\n==========================\n" + msg + "\n==========================\n") diff --git a/src/dsp_tools/commands/xmlupload/models/upload_state.py b/src/dsp_tools/commands/xmlupload/models/upload_state.py index 73b212c0c..a4c6b6b54 100644 --- a/src/dsp_tools/commands/xmlupload/models/upload_state.py +++ b/src/dsp_tools/commands/xmlupload/models/upload_state.py @@ -1,12 +1,13 @@ from dataclasses import dataclass +from dsp_tools.commands.xmlupload.iri_resolver import IriResolver from dsp_tools.commands.xmlupload.models.permission import Permissions from dsp_tools.commands.xmlupload.models.xmlresource import XMLResource from dsp_tools.commands.xmlupload.stash.stash_models import Stash from dsp_tools.commands.xmlupload.upload_config import UploadConfig -@dataclass(frozen=True) +@dataclass class UploadState: """ Save the state of an xmlupload, so that after an interruption, it can be resumed. @@ -14,7 +15,7 @@ class UploadState: pending_resources: list[XMLResource] failed_uploads: list[str] - iri_resolver_lookup: dict[str, str] + iri_resolver: IriResolver pending_stash: Stash | None config: UploadConfig permissions_lookup: dict[str, Permissions] diff --git a/src/dsp_tools/commands/xmlupload/project_client.py b/src/dsp_tools/commands/xmlupload/project_client.py index 74854efd3..f8e70b8e3 100644 --- a/src/dsp_tools/commands/xmlupload/project_client.py +++ b/src/dsp_tools/commands/xmlupload/project_client.py @@ -19,6 +19,10 @@ class ProjectInfo: class ProjectClient(Protocol): """Interface (protocol) for project-related requests to the DSP-API.""" + con: Connection + shortcode: str + project_info: ProjectInfo | None + def get_project_iri(self) -> str: """Get the IRI of the project to which the data is being uploaded.""" diff --git a/src/dsp_tools/commands/xmlupload/resource_create_client.py b/src/dsp_tools/commands/xmlupload/resource_create_client.py index 3dc543c84..9390d4bd9 100644 --- a/src/dsp_tools/commands/xmlupload/resource_create_client.py +++ b/src/dsp_tools/commands/xmlupload/resource_create_client.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Any from typing import assert_never +from typing import cast from loguru import logger @@ -37,17 +38,15 @@ def create_resource( self, resource: XMLResource, bitstream_information: BitstreamInfo | None, - ) -> tuple[str, str]: - """Creates a resource on the DSP server.""" + ) -> str: + """Creates a resource on the DSP server, and returns its IRI""" logger.info( f"Attempting to create resource {resource.res_id} (label: {resource.label}, iri: {resource.iri})..." ) resource_dict = self._make_resource_with_values(resource, bitstream_information) headers = {"X-Asset-Ingested": "true"} if self.media_previously_ingested else None res = self.con.post(route="/v2/resources", data=resource_dict, headers=headers) - iri = res["@id"] - label = res["rdfs:label"] - return iri, label + return cast(str, res["@id"]) def _make_resource_with_values( self, diff --git a/src/dsp_tools/commands/xmlupload/stash/stash_models.py b/src/dsp_tools/commands/xmlupload/stash/stash_models.py index ae1a57aff..90e1b4f5c 100644 --- a/src/dsp_tools/commands/xmlupload/stash/stash_models.py +++ b/src/dsp_tools/commands/xmlupload/stash/stash_models.py @@ -106,3 +106,9 @@ def make(standoff_stash: StandoffStash | None, link_value_stash: LinkValueStash if standoff_stash or link_value_stash: return Stash(standoff_stash, link_value_stash) return None + + def is_empty(self) -> bool: + """Check if there are any stashed items in this stash""" + standoff = not self.standoff_stash or not self.standoff_stash.res_2_stash_items + link = not self.link_value_stash or not self.link_value_stash.res_2_stash_items + return standoff and link diff --git a/src/dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py b/src/dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py index 43b3b9068..ad3c2ec7f 100644 --- a/src/dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +++ b/src/dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py @@ -2,40 +2,39 @@ from datetime import datetime from typing import Any +from typing import cast from loguru import logger -from dsp_tools.commands.xmlupload.iri_resolver import IriResolver +from dsp_tools.commands.xmlupload.models.upload_state import UploadState from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStash from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStashItem +from dsp_tools.commands.xmlupload.stash.stash_models import Stash from dsp_tools.models.exceptions import BaseError from dsp_tools.utils.connection import Connection def upload_stashed_resptr_props( - iri_resolver: IriResolver, + upload_state: UploadState, con: Connection, - stashed_resptr_props: LinkValueStash, context: dict[str, str], -) -> LinkValueStash | None: +) -> None: """ After all resources are uploaded, the stashed resptr props must be applied to their resources in DSP. + The upload state is updated accordingly, as a side effect. Args: - iri_resolver: resolver with a mapping of ids from the XML file to IRIs in DSP + upload_state: the current state of the upload con: connection to DSP - stashed_resptr_props: all resptr props that have been stashed context: the JSON-LD context of the resource - - Returns: - nonapplied_resptr_props: the resptr props that could not be uploaded """ print(f"{datetime.now()}: Upload the stashed resptrs...") logger.info("Upload the stashed resptrs...") - not_uploaded: list[LinkValueStashItem] = [] - for res_id, stash_items in stashed_resptr_props.res_2_stash_items.copy().items(): - res_iri = iri_resolver.get(res_id) + upload_state.pending_stash = cast(Stash, upload_state.pending_stash) + link_value_stash = cast(LinkValueStash, upload_state.pending_stash.link_value_stash) + for res_id, stash_items in link_value_stash.res_2_stash_items.copy().items(): + res_iri = upload_state.iri_resolver.get(res_id) if not res_iri: # resource could not be uploaded to DSP, so the stash cannot be uploaded either # no action necessary: this resource will remain in nonapplied_resptr_props, @@ -44,16 +43,13 @@ def upload_stashed_resptr_props( print(f"{datetime.now()}: Upload resptrs of resource '{res_id}'...") logger.info(f" Upload resptrs of resource '{res_id}'...") for stash_item in stash_items: - target_iri = iri_resolver.get(stash_item.target_id) + target_iri = upload_state.iri_resolver.get(stash_item.target_id) if not target_iri: continue if _upload_stash_item(stash_item, res_iri, target_iri, con, context): - stashed_resptr_props.res_2_stash_items[res_id].remove(stash_item) - else: - not_uploaded.append(stash_item) - if not stashed_resptr_props.res_2_stash_items[res_id]: - del stashed_resptr_props.res_2_stash_items[res_id] - return LinkValueStash.make(not_uploaded) + link_value_stash.res_2_stash_items[res_id].remove(stash_item) + if not link_value_stash.res_2_stash_items[res_id]: + del link_value_stash.res_2_stash_items[res_id] def _upload_stash_item( diff --git a/src/dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py b/src/dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py index 9dbb02a29..1cc507052 100644 --- a/src/dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +++ b/src/dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py @@ -2,14 +2,17 @@ from datetime import datetime from typing import Any +from typing import cast from urllib.parse import quote_plus from loguru import logger from dsp_tools.commands.xmlupload.iri_resolver import IriResolver from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue +from dsp_tools.commands.xmlupload.models.upload_state import UploadState from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStash from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStashItem +from dsp_tools.commands.xmlupload.stash.stash_models import Stash from dsp_tools.models.exceptions import BaseError from dsp_tools.utils.connection import Connection @@ -93,28 +96,22 @@ def _create_XMLResource_json_object_to_update( return jsonobj -def upload_stashed_xml_texts( - iri_resolver: IriResolver, - con: Connection, - stashed_xml_texts: StandoffStash, -) -> StandoffStash | None: +def upload_stashed_xml_texts(upload_state: UploadState, con: Connection) -> None: """ After all resources are uploaded, the stashed xml texts must be applied to their resources in DSP. + The upload state is updated accordingly, as a side effect. Args: - iri_resolver: resolver to map ids from the XML file to IRIs in DSP + upload_state: the current state of the upload con: connection to DSP - stashed_xml_texts: all xml texts that have been stashed - - Returns: - the xml texts that could not be uploaded """ print(f"{datetime.now()}: Upload the stashed XML texts...") logger.info("Upload the stashed XML texts...") - not_uploaded: list[StandoffStashItem] = [] - for res_id, stash_items in stashed_xml_texts.res_2_stash_items.copy().items(): - res_iri = iri_resolver.get(res_id) + upload_state.pending_stash = cast(Stash, upload_state.pending_stash) + standoff_stash = cast(StandoffStash, upload_state.pending_stash.standoff_stash) + for res_id, stash_items in standoff_stash.res_2_stash_items.copy().items(): + res_iri = upload_state.iri_resolver.get(res_id) if not res_iri: # resource could not be uploaded to DSP, so the stash cannot be uploaded either # no action necessary: this resource will remain in the list of not uploaded stash items, @@ -131,25 +128,20 @@ def upload_stashed_xml_texts( for stash_item in stash_items: value_iri = _get_value_iri(stash_item.prop_name, resource_in_triplestore, stash_item.uuid) if not value_iri: - not_uploaded.append(stash_item) continue - success = _upload_stash_item( + if _upload_stash_item( stash_item=stash_item, res_iri=res_iri, res_type=stash_item.res_type, res_id=res_id, value_iri=value_iri, - iri_resolver=iri_resolver, + iri_resolver=upload_state.iri_resolver, con=con, context=context, - ) - if success: - stashed_xml_texts.res_2_stash_items[res_id].remove(stash_item) - else: - not_uploaded.append(stash_item) - if not stashed_xml_texts.res_2_stash_items[res_id]: - stashed_xml_texts.res_2_stash_items.pop(res_id) - return StandoffStash.make(not_uploaded) + ): + standoff_stash.res_2_stash_items[res_id].remove(stash_item) + if not standoff_stash.res_2_stash_items[res_id]: + standoff_stash.res_2_stash_items.pop(res_id) def _get_value_iri( diff --git a/src/dsp_tools/commands/xmlupload/xmlupload.py b/src/dsp_tools/commands/xmlupload/xmlupload.py index d7a251d80..048547636 100644 --- a/src/dsp_tools/commands/xmlupload/xmlupload.py +++ b/src/dsp_tools/commands/xmlupload/xmlupload.py @@ -2,6 +2,7 @@ import pickle import sys +import warnings from datetime import datetime from pathlib import Path from typing import Any @@ -32,6 +33,7 @@ from dsp_tools.commands.xmlupload.stash.upload_stashed_xml_texts import upload_stashed_xml_texts from dsp_tools.commands.xmlupload.upload_config import UploadConfig from dsp_tools.commands.xmlupload.write_diagnostic_info import write_id2iri_mapping +from dsp_tools.models.custom_warnings import DspToolsUserWarning from dsp_tools.models.exceptions import BaseError from dsp_tools.models.exceptions import PermanentTimeOutError from dsp_tools.models.exceptions import UserError @@ -105,60 +107,51 @@ def xmlupload( project_client: ProjectClient = ProjectClientLive(con, config.shortcode) list_client: ListClient = ListClientLive(con, project_client.get_project_iri()) - iri_resolver = IriResolver() + upload_state = UploadState(resources, [], IriResolver(), stash, config, permissions_lookup) - iri_resolver, failed_uploads, nonapplied_stash = upload_resources( - resources=resources, - failed_uploads=[], + upload_resources( + upload_state=upload_state, imgdir=imgdir, sipi_server=sipi_server, - permissions_lookup=permissions_lookup, - con=con, - stash=stash, - config=config, project_client=project_client, list_client=list_client, - iri_resolver=iri_resolver, ) - return cleanup_upload(iri_resolver, config, failed_uploads, nonapplied_stash) + return cleanup_upload(upload_state) -def cleanup_upload( - iri_resolver: IriResolver, - config: UploadConfig, - failed_uploads: list[str], - nonapplied_stash: Stash | None, -) -> bool: +def cleanup_upload(upload_state: UploadState) -> bool: """ Write the id2iri mapping to a file and print a message to the console. Args: - iri_resolver: mapping from internal IDs to IRIs - config: the upload configuration - failed_uploads: resources that caused an error when uploading to DSP - nonapplied_stash: the stash items that could not be reapplied + upload_state: the current state of the upload Returns: - success status (deduced from failed_uploads) + success status (deduced from failed_uploads and non-applied stash) """ - write_id2iri_mapping(iri_resolver.lookup, config.diagnostics) - if not failed_uploads and not nonapplied_stash: + write_id2iri_mapping(upload_state.iri_resolver.lookup, upload_state.config.diagnostics) + has_stash_failed = upload_state.pending_stash and not upload_state.pending_stash.is_empty() + if not upload_state.failed_uploads and not has_stash_failed: success = True print(f"{datetime.now()}: All resources have successfully been uploaded.") logger.info("All resources have successfully been uploaded.") else: success = False - if failed_uploads: - print(f"\n{datetime.now()}: WARNING: Could not upload the following resources: {failed_uploads}\n") + if upload_state.failed_uploads: + res_msg = f"Could not upload the following resources: {upload_state.failed_uploads}" + print(f"\n{datetime.now()}: WARNING: {res_msg}\n") print(f"For more information, see the log file: {logger_savepath}\n") - logger.warning(f"Could not upload the following resources: {failed_uploads}") - if nonapplied_stash: - print(f"\n{datetime.now()}: WARNING: Could not reapply the following stash items: {nonapplied_stash}\n") + logger.warning(res_msg) + if has_stash_failed: + stash_msg = f"Could not reapply the following stash items: {upload_state.pending_stash}" + print(f"\n{datetime.now()}: WARNING: {stash_msg}\n") print(f"For more information, see the log file: {logger_savepath}\n") - logger.warning(f"Could not reapply the following stash items: {nonapplied_stash}") + logger.warning(stash_msg) + msg = _save_upload_state(upload_state) + print(msg) - config.diagnostics.save_location.unlink(missing_ok=True) + upload_state.config.diagnostics.save_location.unlink(missing_ok=True) return success @@ -185,90 +178,34 @@ def _prepare_upload( def upload_resources( - resources: list[XMLResource], - failed_uploads: list[str], + upload_state: UploadState, imgdir: str, sipi_server: Sipi, - permissions_lookup: dict[str, Permissions], - con: Connection, - stash: Stash | None, - config: UploadConfig, project_client: ProjectClient, list_client: ListClient, - iri_resolver: IriResolver, -) -> tuple[IriResolver, list[str], Stash | None]: +) -> None: """ Actual upload of all resources to DSP. Args: - resources: list of XMLResources to upload to DSP - failed_uploads: resources that caused an error in a previous upload + upload_state: the current state of the upload imgdir: folder containing the multimedia files sipi_server: Sipi instance - permissions_lookup: dictionary that contains the permission name as string and the corresponding Python object - con: connection to the DSP server - stash: an object that contains all stashed links that could not be reapplied to their resources - config: the upload configuration project_client: a client for HTTP communication with the DSP-API list_client: a client for HTTP communication with the DSP-API - iri_resolver: mapping from internal IDs to IRIs - - Returns: - the id2iri mapping of the uploaded resources, - a list of resources that could not be uploaded, - and the stash items that could not be reapplied. """ try: - iri_resolver, failed_uploads = _upload_resources( - resources=resources, - failed_uploads=failed_uploads, + _upload_resources( + upload_state=upload_state, imgdir=imgdir, sipi_server=sipi_server, - permissions_lookup=permissions_lookup, - con=con, - config=config, project_client=project_client, list_client=list_client, - iri_resolver=iri_resolver, - ) - except BaseException as err: # noqa: BLE001 (blind-except) - # The forseeable errors are already handled by failed_uploads - # Here we catch the unforseeable exceptions, incl. keyboard interrupt. - _handle_upload_error( - err=err, - iri_resolver=iri_resolver, - pending_resources=resources, - failed_uploads=failed_uploads, - pending_stash=stash, - config=config, - permissions_lookup=permissions_lookup, - ) - - nonapplied_stash = None - try: - nonapplied_stash = ( - _upload_stash( - stash=stash, - iri_resolver=iri_resolver, - con=con, - project_client=project_client, - ) - if stash - else None - ) - except BaseException as err: # noqa: BLE001 (blind-except) - # The forseeable errors are already handled by failed_uploads and nonapplied_stash. - # Here we catch the unforseeable exceptions, incl. keyboard interrupt. - _handle_upload_error( - err=err, - iri_resolver=iri_resolver, - pending_resources=resources, - failed_uploads=failed_uploads, - pending_stash=stash, - config=config, - permissions_lookup=permissions_lookup, ) - return iri_resolver, failed_uploads, nonapplied_stash + if upload_state.pending_stash: + _upload_stash(upload_state, project_client) + except XmlUploadInterruptedError as err: + _handle_upload_error(err, upload_state) def _get_data_from_xml( @@ -284,30 +221,14 @@ def _get_data_from_xml( def _upload_stash( - stash: Stash, - iri_resolver: IriResolver, - con: Connection, + upload_state: UploadState, project_client: ProjectClient, -) -> Stash | None: - if stash.standoff_stash: - nonapplied_standoff = upload_stashed_xml_texts( - iri_resolver=iri_resolver, - con=con, - stashed_xml_texts=stash.standoff_stash, - ) - else: - nonapplied_standoff = None +) -> None: + if upload_state.pending_stash and upload_state.pending_stash.standoff_stash: + upload_stashed_xml_texts(upload_state, project_client.con) context = get_json_ld_context_for_project(project_client.get_ontology_name_dict()) - if stash.link_value_stash: - nonapplied_resptr_props = upload_stashed_resptr_props( - iri_resolver=iri_resolver, - con=con, - stashed_resptr_props=stash.link_value_stash, - context=context, - ) - else: - nonapplied_resptr_props = None - return Stash.make(nonapplied_standoff, nonapplied_resptr_props) + if upload_state.pending_stash and upload_state.pending_stash.link_value_stash: + upload_stashed_resptr_props(upload_state, project_client.con, context) def _get_project_context_from_server(connection: Connection) -> ProjectContext: @@ -343,129 +264,137 @@ def _extract_resources_from_xml(root: etree._Element, default_ontology: str) -> def _upload_resources( - resources: list[XMLResource], - failed_uploads: list[str], + upload_state: UploadState, imgdir: str, sipi_server: Sipi, - permissions_lookup: dict[str, Permissions], - con: Connection, - config: UploadConfig, project_client: ProjectClient, list_client: ListClient, - iri_resolver: IriResolver, -) -> tuple[IriResolver, list[str]]: +) -> None: """ Iterates through all resources and tries to upload them to DSP. If a temporary exception occurs, the action is repeated until success, and if a permanent exception occurs, the resource is skipped. Args: - resources: list of XMLResources to upload to DSP - failed_uploads: resources that caused an error in a previous upload + upload_state: the current state of the upload imgdir: folder containing the multimedia files sipi_server: Sipi instance - permissions_lookup: maps permission strings to Permission objects - con: connection to DSP - config: the upload configuration project_client: a client for HTTP communication with the DSP-API list_client: a client for HTTP communication with the DSP-API - iri_resolver: mapping from internal IDs to IRIs Raises: BaseException: in case of an unhandled exception during resource creation XmlUploadInterruptedError: if the number of resources created is equal to the interrupt_after value - - Returns: - id2iri_mapping, failed_uploads """ project_iri = project_client.get_project_iri() json_ld_context = get_json_ld_context_for_project(project_client.get_ontology_name_dict()) listnode_lookup = list_client.get_list_node_id_to_iri_lookup() resource_create_client = ResourceCreateClient( - con=con, + con=project_client.con, project_iri=project_iri, - iri_resolver=iri_resolver, + iri_resolver=upload_state.iri_resolver, json_ld_context=json_ld_context, - permissions_lookup=permissions_lookup, + permissions_lookup=upload_state.permissions_lookup, listnode_lookup=listnode_lookup, - media_previously_ingested=config.media_previously_uploaded, + media_previously_ingested=upload_state.config.media_previously_uploaded, ) - total_res = len(resources) + len(iri_resolver.lookup) - previous_successful = len(iri_resolver.lookup) - previous_failed = len(failed_uploads) - previous_total = previous_successful + previous_failed - # if the interrupt_after value is not set, the upload will not be interrupted - interrupt_after = config.interrupt_after or total_res + 1 + for creation_attempts_of_this_round, resource in enumerate(upload_state.pending_resources.copy()): + _upload_one_resource( + upload_state=upload_state, + resource=resource, + imgdir=imgdir, + sipi_server=sipi_server, + resource_create_client=resource_create_client, + creation_attempts_of_this_round=creation_attempts_of_this_round, + ) + - for i, resource in enumerate(resources.copy()): - current_res = i + 1 + previous_total - if i >= interrupt_after: - raise XmlUploadInterruptedError(f"Interrupted: Maximum number of resources was reached ({interrupt_after})") +def _upload_one_resource( + upload_state: UploadState, + resource: XMLResource, + imgdir: str, + sipi_server: Sipi, + resource_create_client: ResourceCreateClient, + creation_attempts_of_this_round: int, +) -> None: + try: success, media_info = handle_media_info( - resource, config.media_previously_uploaded, sipi_server, imgdir, permissions_lookup + resource, + upload_state.config.media_previously_uploaded, + sipi_server, + imgdir, + upload_state.permissions_lookup, ) if not success: - failed_uploads.append(resource.res_id) - continue - - res = None - try: - res = _create_resource(resource, media_info, resource_create_client) - if res == (None, None): - # resource creation failed gracefully: register it as failed, then continue - failed_uploads.append(resource.res_id) - continue - else: - # resource creation succeeded: update the iri_resolver and remove the resource from the list - iri, label = res - _tidy_up_resource_creation(iri, label, iri_resolver, resource, current_res, total_res) # type: ignore[arg-type] - except PermanentTimeOutError: - msg = ( - f"There was a timeout while trying to create resource '{resource.res_id}'.\n" - f"It is unclear if the resource '{resource.res_id}' was created successfully or not.\n" - f"Please check manually in the DSP-APP or DB.\n" - f"In case of successful creation, call 'resume-xmlupload' with the flag " - f"'--skip-first-resource' to prevent duplication.\n" - f"If not, a normal 'resume-xmlupload' can be started." - ) - logger.error(msg) - raise XmlUploadInterruptedError(msg) - except BaseException as err: # noqa: BLE001 (blind-except) - if res and res[0]: - # creation succeeded, but during tidy up, a Keyboard Interrupt occurred. tidy up again before escalating - iri, label = res - _tidy_up_resource_creation(iri, label, iri_resolver, resource, current_res, total_res) - else: - # unhandled exception during resource creation - failed_uploads.append(resource.res_id) - raise err from None - finally: - resources.remove(resource) - - return iri_resolver, failed_uploads - - -def _tidy_up_resource_creation( - iri: str, - label: str, - iri_resolver: IriResolver, + upload_state.failed_uploads.append(resource.res_id) + return + except KeyboardInterrupt: + raise XmlUploadInterruptedError("KeyboardInterrupt during media file upload") from None + + try: + iri = _create_resource(resource, media_info, resource_create_client) + except (PermanentTimeOutError, KeyboardInterrupt) as err: + warnings.warn(DspToolsUserWarning(f"{type(err).__name__}: Tidying up, then exit...")) + msg = ( + f"There was a {type(err).__name__} while trying to create resource '{resource.res_id}'.\n" + f"It is unclear if the resource '{resource.res_id}' was created successfully or not.\n" + f"Please check manually in the DSP-APP or DB.\n" + f"In case of successful creation, call 'resume-xmlupload' with the flag " + f"'--skip-first-resource' to prevent duplication.\n" + f"If not, a normal 'resume-xmlupload' can be started." + ) + logger.error(msg) + raise XmlUploadInterruptedError(msg) from None + + try: + _tidy_up_resource_creation_idempotent(upload_state, iri, resource) + _interrupt_if_indicated(upload_state, creation_attempts_of_this_round) + except KeyboardInterrupt: + warnings.warn(DspToolsUserWarning("KeyboardInterrupt: Tidying up, then exit...")) + _tidy_up_resource_creation_idempotent(upload_state, iri, resource) + raise XmlUploadInterruptedError("KeyboardInterrupt during tidy up") from None + + +def _interrupt_if_indicated(upload_state: UploadState, creation_attempts_of_this_round: int) -> None: + # if the interrupt_after value is not set, the upload will not be interrupted + interrupt_after = upload_state.config.interrupt_after or 999_999_999 + if creation_attempts_of_this_round + 1 >= interrupt_after: + msg = f"Interrupted: Maximum number of resources was reached ({upload_state.config.interrupt_after})" + raise XmlUploadInterruptedError(msg) + + +def _tidy_up_resource_creation_idempotent( + upload_state: UploadState, + iri: str | None, resource: XMLResource, - current_res: int, - total_res: int, ) -> None: - iri_resolver.update(resource.res_id, iri) - resource_designation = f"'{label}' (ID: '{resource.res_id}', IRI: '{iri}')" - print(f"{datetime.now()}: Created resource {current_res}/{total_res}: {resource_designation}") - logger.info(f"Created resource {current_res}/{total_res}: {resource_designation}") + previous_successful = len(upload_state.iri_resolver.lookup) + previous_failed = len(upload_state.failed_uploads) + upcoming = len(upload_state.pending_resources) + current_res = previous_successful + previous_failed + 1 + total_res = previous_successful + previous_failed + upcoming + if iri: + # resource creation succeeded: update the iri_resolver + upload_state.iri_resolver.lookup[resource.res_id] = iri + msg = f"Created resource {current_res}/{total_res}: '{resource.label}' (ID: '{resource.res_id}', IRI: '{iri}')" + print(f"{datetime.now()}: {msg}") + logger.info(msg) + else: # noqa: PLR5501 + # resource creation failed gracefully: register it as failed + if resource.res_id not in upload_state.failed_uploads: + upload_state.failed_uploads.append(resource.res_id) + + if resource in upload_state.pending_resources: + upload_state.pending_resources.remove(resource) def _create_resource( resource: XMLResource, bitstream_information: BitstreamInfo | None, resource_create_client: ResourceCreateClient, -) -> tuple[str, str] | tuple[None, None]: +) -> str | None: try: return resource_create_client.create_resource(resource, bitstream_information) except PermanentTimeOutError as err: @@ -484,18 +413,10 @@ def _create_resource( f"Property details:\n" + "\n".join([str(vars(prop)) for prop in resource.properties]) ) logger.exception(log_msg) - return None, None + return None -def _handle_upload_error( - err: BaseException, - iri_resolver: IriResolver, - pending_resources: list[XMLResource], - failed_uploads: list[str], - pending_stash: Stash | None, - config: UploadConfig, - permissions_lookup: dict[str, Permissions], -) -> None: +def _handle_upload_error(err: BaseException, upload_state: UploadState) -> None: """ In case the xmlupload must be interrupted, e.g. because of an error that could not be handled, @@ -508,12 +429,7 @@ def _handle_upload_error( Args: err: the error that was the cause of the abort - iri_resolver: a resolver for internal IDs to IRIs - pending_resources: resources that were not uploaded to DSP - failed_uploads: resources that caused an error when uploading to DSP - pending_stash: an object that contains all stashed links that could not yet be reapplied to their resources - config: the upload configuration - permissions_lookup: dictionary that contains the permission name as string and the corresponding Python object + upload_state: the current state of the upload """ if isinstance(err, XmlUploadInterruptedError): msg = "\n==========================================\n" + err.message + "\n" @@ -527,13 +443,10 @@ def _handle_upload_error( ) exit_code = 1 - upload_state = UploadState( - pending_resources, failed_uploads, iri_resolver.lookup, pending_stash, config, permissions_lookup - ) msg += _save_upload_state(upload_state) - if failed_uploads: - msg += f"Independently from this, there were some resources that could not be uploaded: {failed_uploads}\n" + if failed := upload_state.failed_uploads: + msg += f"Independently from this, there were some resources that could not be uploaded: {failed}\n" if exit_code == 1: logger.exception(msg) @@ -550,4 +463,5 @@ def _save_upload_state(upload_state: UploadState) -> str: save_location.touch(exist_ok=True) with open(save_location, "wb") as file: pickle.dump(upload_state, file) + logger.info(f"Saved the current upload state to {save_location}") return f"Saved the current upload state to {save_location}.\n" diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 000000000..36c4951ab --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,38 @@ +import logging +from typing import Iterator + +import pytest +from _pytest.logging import caplog as _caplog # noqa: F401 (imported but unused) +from loguru import logger + +from dsp_tools.utils.logger_config import logger_config + + +@pytest.fixture() +def caplog(_caplog: pytest.LogCaptureFixture) -> Iterator[pytest.LogCaptureFixture]: # noqa: F811 (redefinition) + """ + The caplog fixture that comes shipped with pytest does not support loguru. + This modified version can be used exactly like the builtin caplog fixture, + which is documented at https://docs.pytest.org/en/latest/how-to/logging.html#caplog-fixture. + Credits: https://www.youtube.com/watch?v=eFdVlyAGeZU + + Yields: + pytest.LogCaptureFixture: The modified caplog fixture. + """ + + class PropagateHandler(logging.Handler): + def emit(self, record: logging.LogRecord) -> None: + logging.getLogger(record.name).handle(record) + + handler_id = logger.add(sink=PropagateHandler(), format="{message}", level="DEBUG") + yield _caplog + logger.remove(handler_id) + + +def pytest_sessionstart() -> None: + """ + Called after the Session object has been created + and before performing collection and entering the run test loop. + See https://docs.pytest.org/en/latest/reference/reference.html#pytest.hookspec.pytest_sessionstart. + """ + logger_config() diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py deleted file mode 100644 index e28dc796b..000000000 --- a/test/e2e/conftest.py +++ /dev/null @@ -1,10 +0,0 @@ -from dsp_tools.utils.logger_config import logger_config - - -def pytest_sessionstart() -> None: - """ - Called after the Session object has been created - and before performing collection and entering the run test loop. - See https://docs.pytest.org/en/latest/reference/reference.html#pytest.hookspec.pytest_sessionstart. - """ - logger_config() diff --git a/test/integration/commands/xmlupload/stash/test_upload_stash_with_mock.py b/test/integration/commands/xmlupload/stash/test_upload_stash_with_mock.py index 8dddf9893..2f419fe6b 100644 --- a/test/integration/commands/xmlupload/stash/test_upload_stash_with_mock.py +++ b/test/integration/commands/xmlupload/stash/test_upload_stash_with_mock.py @@ -6,11 +6,14 @@ from dsp_tools.commands.xmlupload.iri_resolver import IriResolver from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue +from dsp_tools.commands.xmlupload.models.upload_state import UploadState +from dsp_tools.commands.xmlupload.project_client import ProjectInfo from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStash from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStashItem from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStash from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStashItem from dsp_tools.commands.xmlupload.stash.stash_models import Stash +from dsp_tools.commands.xmlupload.upload_config import UploadConfig from dsp_tools.commands.xmlupload.xmlupload import _upload_stash from dsp_tools.utils.connection import Connection @@ -18,9 +21,14 @@ # ruff: noqa: D102 (undocumented-public-method) +@dataclass class ProjectClientStub: """Stub class for ProjectClient.""" + con: Connection + shortcode: str + project_info: ProjectInfo | None + def get_project_iri(self) -> str: raise NotImplementedError("get_project_iri not implemented") @@ -87,13 +95,9 @@ def test_upload_link_value_stash(self) -> None: } ) con: Connection = ConnectionMock(post_responses=[{}]) - nonapplied = _upload_stash( - stash=stash, - iri_resolver=iri_resolver, - con=con, - project_client=ProjectClientStub(), - ) - assert nonapplied is None + upload_state = UploadState([], [], iri_resolver, stash, UploadConfig(), {}) + _upload_stash(upload_state, ProjectClientStub(con, "1234", None)) + assert not upload_state.pending_stash or upload_state.pending_stash.is_empty() class TestUploadTextValueStashes: @@ -136,13 +140,9 @@ def test_upload_text_value_stash(self) -> None: ], put_responses=[{}], ) - nonapplied = _upload_stash( - stash=stash, - iri_resolver=iri_resolver, - con=con, - project_client=ProjectClientStub(), - ) - assert nonapplied is None + upload_state = UploadState([], [], iri_resolver, stash, UploadConfig(), {}) + _upload_stash(upload_state, ProjectClientStub(con, "1234", None)) + assert not upload_state.pending_stash or upload_state.pending_stash.is_empty() def test_not_upload_text_value_stash_if_uuid_not_on_value(self) -> None: """ @@ -182,10 +182,6 @@ def test_not_upload_text_value_stash_if_uuid_not_on_value(self) -> None: ], put_responses=[{}], ) - nonapplied = _upload_stash( - stash=stash, - iri_resolver=iri_resolver, - con=con, - project_client=ProjectClientStub(), - ) - assert nonapplied == stash + upload_state = UploadState([], [], iri_resolver, stash, UploadConfig(), {}) + _upload_stash(upload_state, ProjectClientStub(con, "1234", None)) + assert upload_state.pending_stash == stash diff --git a/test/integration/commands/xmlupload/test_resource_creation.py b/test/integration/commands/xmlupload/test_resource_creation.py new file mode 100644 index 000000000..4d0cddd7d --- /dev/null +++ b/test/integration/commands/xmlupload/test_resource_creation.py @@ -0,0 +1,413 @@ +from copy import deepcopy +from dataclasses import dataclass +from unittest.mock import Mock + +import pytest +from lxml import etree + +from dsp_tools.commands.xmlupload import xmlupload +from dsp_tools.commands.xmlupload.iri_resolver import IriResolver +from dsp_tools.commands.xmlupload.models.sipi import Sipi +from dsp_tools.commands.xmlupload.models.upload_state import UploadState +from dsp_tools.commands.xmlupload.models.xmlresource import XMLResource +from dsp_tools.commands.xmlupload.project_client import ProjectInfo +from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStash +from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStashItem +from dsp_tools.commands.xmlupload.stash.stash_models import Stash +from dsp_tools.commands.xmlupload.upload_config import UploadConfig +from dsp_tools.models.exceptions import PermanentTimeOutError +from dsp_tools.models.exceptions import XmlUploadInterruptedError +from dsp_tools.utils.connection import Connection +from dsp_tools.utils.connection_live import ConnectionLive + + +class ListClientMock: + def get_list_node_id_to_iri_lookup(self) -> dict[str, str]: + return {} + + +@dataclass +class ProjectClientStub: + """Stub class for ProjectClient.""" + + con: Connection + shortcode: str + project_info: ProjectInfo | None + + def get_project_iri(self) -> str: + return "https://admin.test.dasch.swiss/project/MsOaiQkcQ7-QPxsYBKckfQ" + + def get_ontology_iris(self) -> list[str]: + raise NotImplementedError("get_project_iri not implemented") + + def get_ontology_name_dict(self) -> dict[str, str]: + return {} + + def get_ontology_iri_dict(self) -> dict[str, str]: + raise NotImplementedError("get_project_iri not implemented") + + +def test_one_resource_without_links() -> None: + xml_strings = [ + """ + + foo_1 text + + """, + ] + xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings] + upload_state = UploadState(xml_resources, [], IriResolver(), None, UploadConfig(), {}) + con = Mock(spec_set=ConnectionLive) + post_responses = [{"@id": "foo_1_iri", "rdfs:label": "foo_1_label"}] + con.post = Mock(side_effect=post_responses) + project_client = ProjectClientStub(con, "1234", None) + + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + + assert len(con.post.call_args_list) == len(post_responses) + match con.post.call_args_list[0].kwargs: + case { + "route": "/v2/resources", + "data": { + "@type": "my_onto:foo_1_type", + "rdfs:label": "foo_1_label", + "knora-api:attachedToProject": {"@id": "https://admin.test.dasch.swiss/project/MsOaiQkcQ7-QPxsYBKckfQ"}, + "@context": dict(), + "my_onto:hasSimpleText": [{"@type": "knora-api:TextValue", "knora-api:valueAsString": "foo_1 text"}], + }, + }: + assert True + case _: + pytest.fail("POST request was not sent correctly") + assert not upload_state.pending_resources + assert not upload_state.failed_uploads + assert upload_state.iri_resolver.lookup == {"foo_1_id": "foo_1_iri"} + assert not upload_state.pending_stash + + +def test_one_resource_with_link_to_existing_resource() -> None: + xml_strings = [ + """ + + foo_2_id + + """, + ] + xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings] + upload_state = UploadState(xml_resources, [], IriResolver({"foo_2_id": "foo_2_iri"}), None, UploadConfig(), {}) + con = Mock(spec_set=ConnectionLive) + post_responses = [{"@id": "foo_1_iri", "rdfs:label": "foo_1_label"}] + con.post = Mock(side_effect=post_responses) + project_client = ProjectClientStub(con, "1234", None) + + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + + assert len(con.post.call_args_list) == len(post_responses) + match con.post.call_args_list[0].kwargs: + case { + "route": "/v2/resources", + "data": { + "@type": "my_onto:foo_1_type", + "rdfs:label": "foo_1_label", + "knora-api:attachedToProject": {"@id": "https://admin.test.dasch.swiss/project/MsOaiQkcQ7-QPxsYBKckfQ"}, + "@context": dict(), + "my_onto:hasCustomLinkValue": [ + {"@type": "knora-api:LinkValue", "knora-api:linkValueHasTargetIri": {"@id": "foo_2_iri"}} + ], + }, + }: + assert True + case _: + pytest.fail("POST request was not sent correctly") + assert not upload_state.pending_resources + assert not upload_state.failed_uploads + assert upload_state.iri_resolver.lookup == {"foo_1_id": "foo_1_iri", "foo_2_id": "foo_2_iri"} + assert not upload_state.pending_stash + + +def test_2_resources_with_stash() -> None: + xml_strings = [ + '', + '', + ] + xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings] + link_val_stash_dict = { + "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")], + "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")], + } + stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None) + upload_state = UploadState(xml_resources, [], IriResolver(), deepcopy(stash), UploadConfig(), {}) + con = Mock(spec_set=ConnectionLive) + post_responses = [ + {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"}, + {"@id": "foo_2_iri", "rdfs:label": "foo_2_label"}, + {}, # uploading a stash doesn't rely on a certain response + {}, # uploading a stash doesn't rely on a certain response + ] + con.post = Mock(side_effect=post_responses) + project_client = ProjectClientStub(con, "1234", None) + + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + + assert len(con.post.call_args_list) == len(post_responses) + match con.post.call_args_list[2].kwargs: + case { + "route": "/v2/values", + "data": { + "@type": "my_onto:foo_1_type", + "@id": "foo_1_iri", + "@context": dict(), + "my_onto:hasCustomLinkValue": { + "@type": "knora-api:LinkValue", + "knora-api:linkValueHasTargetIri": {"@id": "foo_2_iri"}, + }, + }, + }: + assert True + case _: + pytest.fail("POST request was not sent correctly") + assert not upload_state.pending_resources + assert not upload_state.failed_uploads + assert upload_state.iri_resolver.lookup == {"foo_1_id": "foo_1_iri", "foo_2_id": "foo_2_iri"} + assert not upload_state.pending_stash or upload_state.pending_stash.is_empty() + + +def test_2_resources_with_stash_interrupted_by_timeout() -> None: + _2_resources_with_stash_interrupted_by_error(PermanentTimeOutError(""), "PermanentTimeOutError") + + +def test_2_resources_with_stash_interrupted_by_keyboard() -> None: + _2_resources_with_stash_interrupted_by_error(KeyboardInterrupt(), "KeyboardInterrupt") + + +def _2_resources_with_stash_interrupted_by_error(err_to_interrupt_with: BaseException, err_as_str: str) -> None: + xml_strings = [ + '', + '', + ] + xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings] + link_val_stash_dict = { + "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")], + "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")], + } + stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None) + upload_state = UploadState(xml_resources.copy(), [], IriResolver(), deepcopy(stash), UploadConfig(), {}) + con = Mock(spec_set=ConnectionLive) + post_responses = [ + {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"}, + err_to_interrupt_with, + ] + con.post = Mock(side_effect=post_responses) + project_client = ProjectClientStub(con, "1234", None) + xmlupload._handle_upload_error = Mock() + + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + + assert len(con.post.call_args_list) == len(post_responses) + err_msg = ( + f"There was a {err_as_str} while trying to create resource 'foo_2_id'.\n" + "It is unclear if the resource 'foo_2_id' was created successfully or not.\n" + "Please check manually in the DSP-APP or DB.\n" + "In case of successful creation, call 'resume-xmlupload' with the flag " + "'--skip-first-resource' to prevent duplication.\n" + "If not, a normal 'resume-xmlupload' can be started." + ) + upload_state_expected = UploadState( + xml_resources[1:], [], IriResolver({"foo_1_id": "foo_1_iri"}), stash, UploadConfig(), {} + ) + xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected) + + +def test_5_resources_with_stash_and_interrupt_after_2() -> None: + xml_strings = [ + '', + '', + '', + '', + '', + ] + xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings] + link_val_stash_dict = { + "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")], + "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")], + } + stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None) + upload_config = UploadConfig(interrupt_after=2) + upload_state = UploadState(xml_resources.copy(), [], IriResolver(), deepcopy(stash), upload_config, {}) + con = Mock(spec_set=ConnectionLive) + post_responses = [ + {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"}, + {"@id": "foo_2_iri", "rdfs:label": "foo_2_label"}, + {"@id": "foo_3_iri", "rdfs:label": "foo_3_label"}, + {"@id": "foo_4_iri", "rdfs:label": "foo_4_label"}, + {"@id": "foo_5_iri", "rdfs:label": "foo_5_label"}, + {}, # uploading a stash doesn't rely on a certain response + {}, # uploading a stash doesn't rely on a certain response + ] + con.post = Mock(side_effect=post_responses) + project_client = ProjectClientStub(con, "1234", None) + xmlupload._handle_upload_error = Mock() + err_msg = "Interrupted: Maximum number of resources was reached (2)" + + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + iri_resolver_expected = IriResolver({"foo_1_id": "foo_1_iri", "foo_2_id": "foo_2_iri"}) + upload_state_expected = UploadState(xml_resources[2:], [], iri_resolver_expected, stash, upload_config, {}) + xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected) + + xmlupload._handle_upload_error = Mock() + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + iri_resolver_expected.lookup.update({"foo_3_id": "foo_3_iri", "foo_4_id": "foo_4_iri"}) + upload_state_expected = UploadState(xml_resources[4:], [], iri_resolver_expected, stash, upload_config, {}) + xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected) + + xmlupload._handle_upload_error = Mock() + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + iri_resolver_expected.lookup.update({"foo_5_id": "foo_5_iri"}) + empty_stash = Stash(standoff_stash=None, link_value_stash=LinkValueStash({})) + upload_state_expected = UploadState([], [], iri_resolver_expected, empty_stash, upload_config, {}) + xmlupload._handle_upload_error.assert_not_called() + assert upload_state == upload_state_expected + + +def test_6_resources_with_stash_and_interrupt_after_2() -> None: + xml_strings = [ + '', + '', + '', + '', + '', + '', + ] + xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings] + link_val_stash_dict = { + "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")], + "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")], + } + stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None) + upload_config = UploadConfig(interrupt_after=2) + upload_state = UploadState(xml_resources.copy(), [], IriResolver(), deepcopy(stash), upload_config, {}) + con = Mock(spec_set=ConnectionLive) + post_responses = [ + {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"}, + {"@id": "foo_2_iri", "rdfs:label": "foo_2_label"}, + {"@id": "foo_3_iri", "rdfs:label": "foo_3_label"}, + {"@id": "foo_4_iri", "rdfs:label": "foo_4_label"}, + {"@id": "foo_5_iri", "rdfs:label": "foo_5_label"}, + {"@id": "foo_6_iri", "rdfs:label": "foo_6_label"}, + {}, # uploading a stash doesn't rely on a certain response + {}, # uploading a stash doesn't rely on a certain response + ] + con.post = Mock(side_effect=post_responses) + project_client = ProjectClientStub(con, "1234", None) + xmlupload._handle_upload_error = Mock() + err_msg = "Interrupted: Maximum number of resources was reached (2)" + + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + iri_resolver_expected = IriResolver({"foo_1_id": "foo_1_iri", "foo_2_id": "foo_2_iri"}) + upload_state_expected = UploadState(xml_resources[2:], [], iri_resolver_expected, stash, upload_config, {}) + xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected) + + xmlupload._handle_upload_error = Mock() + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + iri_resolver_expected.lookup.update({"foo_3_id": "foo_3_iri", "foo_4_id": "foo_4_iri"}) + upload_state_expected = UploadState(xml_resources[4:], [], iri_resolver_expected, stash, upload_config, {}) + xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected) + + xmlupload._handle_upload_error = Mock() + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + iri_resolver_expected.lookup.update({"foo_5_id": "foo_5_iri", "foo_6_id": "foo_6_iri"}) + upload_state_expected = UploadState([], [], iri_resolver_expected, stash, upload_config, {}) + xmlupload._handle_upload_error.assert_called_once_with(XmlUploadInterruptedError(err_msg), upload_state_expected) + + xmlupload._handle_upload_error = Mock() + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + empty_stash = Stash(standoff_stash=None, link_value_stash=LinkValueStash({})) + upload_state_expected = UploadState([], [], iri_resolver_expected, empty_stash, upload_config, {}) + xmlupload._handle_upload_error.assert_not_called() + assert upload_state == upload_state_expected + + +def test_logging(caplog: pytest.LogCaptureFixture) -> None: + xml_strings = [ + '', + '', + '', + '', + '', + ] + xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings] + link_val_stash_dict = { + "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")], + "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")], + } + stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None) + upload_config = UploadConfig(interrupt_after=2) + upload_state = UploadState(xml_resources.copy(), [], IriResolver(), deepcopy(stash), upload_config, {}) + con = Mock(spec_set=ConnectionLive) + post_responses = [ + {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"}, + {"@id": "foo_2_iri", "rdfs:label": "foo_2_label"}, + {"@id": "foo_3_iri", "rdfs:label": "foo_3_label"}, + {"@id": "foo_4_iri", "rdfs:label": "foo_4_label"}, + {"@id": "foo_5_iri", "rdfs:label": "foo_5_label"}, + {}, # uploading a stash doesn't rely on a certain response + {}, # uploading a stash doesn't rely on a certain response + ] + con.post = Mock(side_effect=post_responses) + project_client = ProjectClientStub(con, "1234", None) + xmlupload._handle_upload_error = Mock() + + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + assert caplog.records[1].message == "Created resource 1/5: 'foo_1_label' (ID: 'foo_1_id', IRI: 'foo_1_iri')" + assert caplog.records[3].message == "Created resource 2/5: 'foo_2_label' (ID: 'foo_2_id', IRI: 'foo_2_iri')" + caplog.clear() + + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + assert caplog.records[1].message == "Created resource 3/5: 'foo_3_label' (ID: 'foo_3_id', IRI: 'foo_3_iri')" + assert caplog.records[3].message == "Created resource 4/5: 'foo_4_label' (ID: 'foo_4_id', IRI: 'foo_4_iri')" + caplog.clear() + + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + assert caplog.records[1].message == "Created resource 5/5: 'foo_5_label' (ID: 'foo_5_id', IRI: 'foo_5_iri')" + assert caplog.records[3].message == " Upload resptrs of resource 'foo_1_id'..." + assert caplog.records[5].message == " Upload resptrs of resource 'foo_2_id'..." + caplog.clear() + + +def test_post_requests() -> None: + xml_strings = [ + '', + '', + '', + '', + '', + '', + ] + xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings] + link_val_stash_dict = { + "foo_1_id": [LinkValueStashItem("foo_1_id", "my_onto:foo_1_type", "my_onto:hasCustomLink", "foo_2_id")], + "foo_2_id": [LinkValueStashItem("foo_2_id", "my_onto:foo_2_type", "my_onto:hasCustomLink", "foo_1_id")], + } + stash = Stash(link_value_stash=LinkValueStash(link_val_stash_dict), standoff_stash=None) + upload_config = UploadConfig(interrupt_after=2) + upload_state = UploadState(xml_resources.copy(), [], IriResolver(), deepcopy(stash), upload_config, {}) + con = Mock(spec_set=ConnectionLive) + post_responses = [ + {"@id": "foo_1_iri", "rdfs:label": "foo_1_label"}, + {"@id": "foo_2_iri", "rdfs:label": "foo_2_label"}, + {"@id": "foo_3_iri", "rdfs:label": "foo_3_label"}, + {"@id": "foo_4_iri", "rdfs:label": "foo_4_label"}, + {"@id": "foo_5_iri", "rdfs:label": "foo_5_label"}, + {"@id": "foo_6_iri", "rdfs:label": "foo_6_label"}, + {}, # uploading a stash doesn't rely on a certain response + {}, # uploading a stash doesn't rely on a certain response + ] + con.post = Mock(side_effect=post_responses) + project_client = ProjectClientStub(con, "1234", None) + xmlupload._handle_upload_error = Mock() + + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + xmlupload.upload_resources(upload_state, ".", Sipi(con), project_client, ListClientMock()) + assert len(con.post.call_args_list) == len(post_responses) diff --git a/test/integration/commands/xmlupload/test_upload_state.py b/test/integration/commands/xmlupload/test_upload_state.py index 50af8df50..a9d4055c0 100644 --- a/test/integration/commands/xmlupload/test_upload_state.py +++ b/test/integration/commands/xmlupload/test_upload_state.py @@ -3,6 +3,7 @@ from lxml import etree +from dsp_tools.commands.xmlupload.iri_resolver import IriResolver from dsp_tools.commands.xmlupload.models.upload_state import UploadState from dsp_tools.commands.xmlupload.models.xmlresource import XMLResource from dsp_tools.commands.xmlupload.upload_config import DiagnosticsConfig @@ -23,7 +24,7 @@ def test_save_upload_state(tmp_path: Path) -> None: upload_state = UploadState( pending_resources=[XMLResource(etree.fromstring(resource_str), default_ontology="test")], failed_uploads=[], - iri_resolver_lookup={"foo": "bar"}, + iri_resolver=IriResolver({"foo": "bar"}), pending_stash=None, config=config, permissions_lookup={}, @@ -34,7 +35,7 @@ def test_save_upload_state(tmp_path: Path) -> None: assert msg == f"Saved the current upload state to {save_location}.\n" assert len(upload_state.pending_resources) == len(saved_state.pending_resources) assert [r.res_id for r in upload_state.pending_resources] == [r.res_id for r in saved_state.pending_resources] - assert upload_state.iri_resolver_lookup == saved_state.iri_resolver_lookup + assert upload_state.iri_resolver.lookup == saved_state.iri_resolver.lookup assert upload_state.pending_stash == saved_state.pending_stash assert upload_state.config == saved_state.config assert upload_state.permissions_lookup == saved_state.permissions_lookup diff --git a/test/unittests/commands/xmlupload/test_resource_creation.py b/test/unittests/commands/xmlupload/test_resource_creation.py new file mode 100644 index 000000000..a11b4d362 --- /dev/null +++ b/test/unittests/commands/xmlupload/test_resource_creation.py @@ -0,0 +1,53 @@ +from lxml import etree + +from dsp_tools.commands.xmlupload.iri_resolver import IriResolver +from dsp_tools.commands.xmlupload.models.upload_state import UploadState +from dsp_tools.commands.xmlupload.models.xmlresource import XMLResource +from dsp_tools.commands.xmlupload.upload_config import UploadConfig +from dsp_tools.commands.xmlupload.xmlupload import _tidy_up_resource_creation_idempotent + + +def test_idempotency_on_success() -> None: + xml_strings = [ + """ + + foo_1 text + + """, + """ + + foo_2 text + + """, + ] + xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings] + upload_state = UploadState(xml_resources.copy(), [], IriResolver(), None, UploadConfig(), {}) + for _ in range(3): + _tidy_up_resource_creation_idempotent(upload_state, "foo_1_iri", xml_resources[0]) + assert upload_state.pending_resources == xml_resources[1:] + assert upload_state.failed_uploads == [] + assert upload_state.iri_resolver.lookup == {"foo_1_id": "foo_1_iri"} + assert not upload_state.pending_stash + + +def test_idempotency_on_failure() -> None: + xml_strings = [ + """ + + foo_1 text + + """, + """ + + foo_2 text + + """, + ] + xml_resources = [XMLResource(etree.fromstring(xml_str), "my_onto") for xml_str in xml_strings] + upload_state = UploadState(xml_resources.copy(), [], IriResolver(), None, UploadConfig(), {}) + for _ in range(3): + _tidy_up_resource_creation_idempotent(upload_state, None, xml_resources[0]) + assert upload_state.pending_resources == xml_resources[1:] + assert upload_state.failed_uploads == ["foo_1_id"] + assert upload_state.iri_resolver.lookup == {} + assert not upload_state.pending_stash