diff --git a/docs/dsp-tools-create.md b/docs/dsp-tools-create.md index d743ce695..4cfc724f6 100644 --- a/docs/dsp-tools-create.md +++ b/docs/dsp-tools-create.md @@ -317,12 +317,8 @@ Example of a list: #### Lists from Excel -A list can be directly imported from one or several Excel files. The Excel sheet must have the following format: - -![img-list-example.png](assets/images/img-list-example.png) - -If there are several languages, a separate Excel file for each language has to be provided. The folder with the Excel -file(s) can be directly referenced inside the list definition by defining it as new list node: +A list can be directly imported from one or several Excel files. The folder with the Excel file(s) can then directly +be referenced inside the list definition by defining it as new list node: ```json { @@ -341,12 +337,12 @@ file(s) can be directly referenced inside the list definition by defining it as } ``` -The nodes section must contain the field: +The `nodes` section has to contain the field: -- _folder_: Path to the folder where the Excel files are stored +- _folder_: Path to the folder containing the Excel files -Further details to this functionality can be found -[here](dsp-tools-excel#create-a-list-from-one-or-several-excel-files). +Further information about the expected format of the Excel lists and details to this functionality can be found +[here](./dsp-tools-excel.md#create-a-list-from-one-or-several-excel-files). The `lists` element is optional. If not used, it should be omitted. diff --git a/docs/dsp-tools-excel.md b/docs/dsp-tools-excel.md index 8cef370b9..22c564362 100644 --- a/docs/dsp-tools-excel.md +++ b/docs/dsp-tools-excel.md @@ -76,6 +76,7 @@ For further information about properties, see [here](./dsp-tools-create-ontologi ## Create a list from one or several Excel files + With dsp-tools a JSON list can be created from one or several Excel files. The list can then be inserted into a JSON ontology and uploaded to a DSP server. The expected structure of the Excel files is described [here](./dsp-tools-create.md#lists-from-excel). It is possible to create multilingual lists. In this case, a separate diff --git a/docs/dsp-tools-xmlupload.md b/docs/dsp-tools-xmlupload.md index 22d2dbf19..c22df53dd 100644 --- a/docs/dsp-tools-xmlupload.md +++ b/docs/dsp-tools-xmlupload.md @@ -179,6 +179,9 @@ A `` element contains all necessary information to create a resource. - `id`: a unique, arbitrary string providing a unique ID to the resource in order to be referencable by other resources; the ID is only used during the import process and later replaced by the IRI used internally by DSP (required) - `permissions`: a reference to a permission set; the permissions will be applied to the created resource (optional) +- `iri`: a custom IRI used when migrating existing resources (optional) +- `ark`: a version 0 ARK used when migrating existing resources from salsah.org to DSP (optional), it is not possible to +use `iri` and `ark` in the same resource. When `ark` is used, it overrides `iri`. A complete `` element may look as follows: diff --git a/knora/dsplib/models/resource.py b/knora/dsplib/models/resource.py index 8248cf146..671cbcb57 100644 --- a/knora/dsplib/models/resource.py +++ b/knora/dsplib/models/resource.py @@ -217,6 +217,9 @@ def fromJsonLdObj(self, con: Connection, jsonld_obj: Any) -> 'ResourceInstance': def toJsonLdObj(self, action: Actions) -> Any: tmp = {} if action == Actions.Create: + # if a custom IRI is provided, use it + if self._iri: + tmp['@id'] = self._iri tmp['@type'] = self.classname tmp["knora-api:attachedToProject"] = { "@id": self.project @@ -335,7 +338,7 @@ def __init__(self, con: Connection, projident: str): self._con = con - if re.match("^[0-9aAbBcCdDeEfF]{4}$", projident): + if re.match("^[0-9a-fA-F]{4}$", projident): project = Project(con=self._con, shortcode=projident) elif re.match("^[\\w-]+$", projident): project = Project(con=self._con, shortname=projident) diff --git a/knora/dsplib/schemas/data.xsd b/knora/dsplib/schemas/data.xsd index 5a40896c3..c854a4b2a 100644 --- a/knora/dsplib/schemas/data.xsd +++ b/knora/dsplib/schemas/data.xsd @@ -409,7 +409,9 @@ + + diff --git a/knora/dsplib/utils/xml_upload.py b/knora/dsplib/utils/xml_upload.py index 60261b6a0..92f72592d 100644 --- a/knora/dsplib/utils/xml_upload.py +++ b/knora/dsplib/utils/xml_upload.py @@ -1,8 +1,11 @@ """ This module handles the import of XML data into the DSP platform. """ +import base64 import json import os +import re +import uuid from datetime import datetime from pathlib import Path from typing import Optional, Union @@ -229,6 +232,8 @@ class XMLResource: """Represents a resource in the XML used for data import""" _id: str + _iri: Optional[str] + _ark: Optional[str] _label: str _restype: str _permissions: Optional[str] @@ -242,8 +247,13 @@ def __init__(self, node: etree.Element, default_ontology: Optional[str] = None) Args: node: The DOM node to be processed representing a resource (which is a child of the knora element) default_ontology: The default ontology (given in the attribute default-ontology of the knora element) + + Returns: + None """ self._id = node.attrib['id'] + self._iri = node.attrib.get('iri') + self._ark = node.attrib.get('ark') self._label = node.attrib['label'] # get the resource type which is in format namespace:resourcetype, p.ex. rosetta:Image tmp_res_type = node.attrib['restype'].split(':') @@ -255,11 +265,7 @@ def __init__(self, node: etree.Element, default_ontology: Optional[str] = None) self._restype = default_ontology + ':' + tmp_res_type[1] else: self._restype = 'knora-admin:' + tmp_res_type[0] - permissions_tmp = node.attrib.get("permissions") - if permissions_tmp: - self._permissions = node.attrib['permissions'] - else: - self._permissions = None + self._permissions = node.attrib.get("permissions") self._bitstream = None self._properties = [] for subnode in node: @@ -277,6 +283,16 @@ def id(self) -> str: """The unique id of the resource""" return self._id + @property + def iri(self) -> Optional[str]: + """The custom IRI of the resource""" + return self._iri + + @property + def ark(self) -> Optional[str]: + """The custom ARK of the resource""" + return self._ark + @property def label(self) -> str: """The label of the resource""" @@ -288,7 +304,7 @@ def restype(self) -> str: return self._restype @property - def permissions(self) -> str: + def permissions(self) -> Optional[str]: """The reference to the permissions set for this resource""" return self._permissions @@ -323,8 +339,7 @@ def get_resptrs(self) -> list[str]: resptrs.extend(value.resrefs) return resptrs - def get_propvals(self, resiri_lookup: dict[str, str], permissions_lookup: dict[str, Permissions]) -> dict[ - str, Permissions]: + def get_propvals(self, resiri_lookup: dict[str, str], permissions_lookup: dict[str, Permissions]) -> dict[str, Permissions]: """ Get a dictionary of the property names and their values belonging to a resource @@ -371,8 +386,7 @@ def get_propvals(self, resiri_lookup: dict[str, str], permissions_lookup: dict[s prop_data[prop.name] = vals if len(vals) > 1 else vals[0] return prop_data - def get_bitstream(self, internal_file_name_bitstream: str, permissions_lookup: dict[str, Permissions]) -> Optional[ - dict[str, Union[str, Permissions]]]: + def get_bitstream(self, internal_file_name_bitstream: str, permissions_lookup: dict[str, Permissions]) -> Optional[dict[str, Union[str, Permissions]]]: """ Get the bitstream object belonging to the resource @@ -574,6 +588,46 @@ def validate_xml_against_schema(input_file: str, schema_file: str) -> bool: return is_valid +def convert_ark_v0_to_resource_iri(ark: str) -> str: + """ + Converts an ARK URL from salsah.org (ARK version 0) of the form ark:/72163/080c-779b9990a0c3f-6e to a DSP resource + IRI of the form http://rdfh.ch/080C/Ef9heHjPWDS7dMR_gGax2Q + + This method is needed for the migration of projects from salsah.org to DSP. Resources need to be created with an + existing ARK, so the IRI needs to be extracted from that ARK in order for the ARK URL to be still valid after the + migration. + + Args: + ark : an ARK version 0 of the form ark:/72163/080c-779b9990a0c3f-6e, '72163' being the Name Assigning Authority + number, '080c' being the project shortcode, '779b9990a0c3f' being an ID derived from the object's Salsah ID and + '6e' being check digits + + Returns: + Resource IRI (str) of the form http://rdfh.ch/080C/Ef9heHjPWDS7dMR_gGax2Q + """ + # create the DaSCH namespace to create version 5 UUIDs + generic_namespace_url = uuid.NAMESPACE_URL + dasch_uuid_ns = uuid.uuid5(generic_namespace_url, "https://dasch.swiss") # cace8b00-717e-50d5-bcb9-486f39d733a2 + + # get the salsah resource ID from the ARK and convert it to a UUID version 5 (base64 encoded) + if ark.count("-") != 2: + raise BaseError(f"while converting ARK '{ark}'. The ARK seems to be invalid") + project_id, resource_id, _ = ark.split("-") + _, project_id = project_id.rsplit("/", 1) + project_id = project_id.upper() + if not re.match("^[0-9a-fA-F]{4}$", project_id): + raise BaseError(f"while converting ARK '{ark}'. Invalid project shortcode '{project_id}'") + if not re.match("^[0-9A-Za-z]+$", resource_id): + raise BaseError(f"while converting ARK '{ark}'. Invalid Salsah ID '{resource_id}'") + + # make a UUID v5 from the namespace created above (which is a UUID itself) and the resource ID and encode it to base64 + dsp_uuid = base64.urlsafe_b64encode(uuid.uuid5(dasch_uuid_ns, resource_id).bytes).decode("utf-8") + dsp_uuid = dsp_uuid[:-2] + + # use the new UUID to create the resource IRI + return "http://rdfh.ch/" + project_id + "/" + dsp_uuid + + def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: str, sipi: str, verbose: bool, validate_only: bool, incremental: bool) -> None: """ @@ -670,6 +724,10 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s if verbose: resource.print() + resource_iri = resource.iri + if resource.ark: + resource_iri = convert_ark_v0_to_resource_iri(resource.ark) + resource_bitstream = None if resource.bitstream: img = sipi.upload_bitstream(os.path.join(imgdir, resource.bitstream.value)) @@ -682,6 +740,7 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s # create a resource instance (ResourceInstance) from the given resource in the XML (XMLResource) instance: ResourceInstance = res_classes[resource.restype](con=con, label=resource.label, + iri=resource_iri, permissions=permissions_tmp, bitstream=resource_bitstream, values=resource.get_propvals(res_iri_lookup, diff --git a/test/unittests/BUILD.bazel b/test/unittests/BUILD.bazel index 0853b3e54..2e747b3e4 100644 --- a/test/unittests/BUILD.bazel +++ b/test/unittests/BUILD.bazel @@ -6,6 +6,15 @@ load("@knora_py_deps//:requirements.bzl", "requirement") package(default_visibility = ["//visibility:public"]) +py_test( + name = "test_convert_ark_v0_to_resource_iri", + srcs = ["test_convert_ark_v0_to_resource_iri.py"], + deps = [ + "//knora/dsplib/utils:xml_upload", + "//knora/dsplib/models:helpers" + ], +) + py_test( name = "test_langstring", srcs = ["test_langstring.py"], diff --git a/test/unittests/test_convert_ark_v0_to_resource_iri.py b/test/unittests/test_convert_ark_v0_to_resource_iri.py new file mode 100644 index 000000000..804219446 --- /dev/null +++ b/test/unittests/test_convert_ark_v0_to_resource_iri.py @@ -0,0 +1,34 @@ +"""Unit tests for ARK v0 conversion""" + +import unittest + +from knora.dsplib.models.helpers import BaseError +from knora.dsplib.utils.xml_upload import convert_ark_v0_to_resource_iri + + +class TestARKV02IRI(unittest.TestCase): + + def test_convert_ark_v0_to_resource_iri(self): + ark = "ark:/72163/080c-779b9990a0c3f-6e" + iri = convert_ark_v0_to_resource_iri(ark) + self.assertEqual("http://rdfh.ch/080C/Ef9heHjPWDS7dMR_gGax2Q", iri) + + with self.assertRaises(BaseError) as err1: + convert_ark_v0_to_resource_iri("ark:/72163/080c-779b999-0a0c3f-6e") + self.assertEqual(err1.exception.message, "while converting ARK 'ark:/72163/080c-779b999-0a0c3f-6e'. The ARK seems to be invalid") + + with self.assertRaises(BaseError) as err2: + convert_ark_v0_to_resource_iri("ark:/72163/080X-779b9990a0c3f-6e") + self.assertEqual(err2.exception.message, "while converting ARK 'ark:/72163/080X-779b9990a0c3f-6e'. Invalid project shortcode '080X'") + + with self.assertRaises(BaseError) as err3: + convert_ark_v0_to_resource_iri("ark:/72163/080c1-779b9990a0c3f-6e") + self.assertEqual(err3.exception.message, "while converting ARK 'ark:/72163/080c1-779b9990a0c3f-6e'. Invalid project shortcode '080C1'") + + with self.assertRaises(BaseError) as err3: + convert_ark_v0_to_resource_iri("ark:/72163/080c-779b99+90a0c3f-6e") + self.assertEqual(err3.exception.message, "while converting ARK 'ark:/72163/080c-779b99+90a0c3f-6e'. Invalid Salsah ID '779b99+90a0c3f'") + + +if __name__ == '__main__': + unittest.main()