diff --git a/knora/dsp_tools.py b/knora/dsp_tools.py index 6f0f9034b..0a63adde3 100644 --- a/knora/dsp_tools.py +++ b/knora/dsp_tools.py @@ -48,30 +48,29 @@ def program(user_args: list[str]) -> None: subparsers = parser.add_subparsers(title='Subcommands', description='Valid subcommands are', help='sub-command help') # create - parser_create = subparsers.add_parser('create', help='Upload an ontology and/or list(s) from a JSON file to the ' - 'DaSCH Service Platform') + parser_create = subparsers.add_parser('create', help='Upload a project and/or list(s) from a JSON project file to ' + 'the DaSCH Service Platform') parser_create.set_defaults(action='create') parser_create.add_argument('-s', '--server', type=str, default=default_localhost, help=url_text) parser_create.add_argument('-u', '--user', default=default_user, help=username_text) parser_create.add_argument('-p', '--password', default=default_pw, help=password_text) - parser_create.add_argument('-V', '--validate-only', action='store_true', help='Do only validation of JSON, no ' - 'upload of the ontology') + parser_create.add_argument('-V', '--validate-only', action='store_true', help='Only validate the project against ' + 'the JSON schema, without uploading it') parser_create.add_argument('-l', '--lists-only', action='store_true', help='Upload only the list(s)') parser_create.add_argument('-v', '--verbose', action='store_true', help=verbose_text) parser_create.add_argument('-d', '--dump', action='store_true', help='dump test files for DSP-API requests') - parser_create.add_argument('datamodelfile', help='path to data model file') + parser_create.add_argument('projectfile', help='path to a JSON project file') # get - parser_get = subparsers.add_parser('get', help='Get the ontology (data model) of a project from the DaSCH Service ' - 'Platform.') + parser_get = subparsers.add_parser('get', help='Get a project from the DaSCH Service Platform.') parser_get.set_defaults(action='get') parser_get.add_argument('-u', '--user', default=default_user, help=username_text) parser_get.add_argument('-p', '--password', default=default_pw, help=password_text) parser_get.add_argument('-s', '--server', type=str, default=default_localhost, help=url_text) parser_get.add_argument('-P', '--project', type=str, help='Shortcode, shortname or iri of project', required=True) parser_get.add_argument('-v', '--verbose', action='store_true', help=verbose_text) - parser_get.add_argument('datamodelfile', help='Path to the file the ontology should be written to', - default='onto.json') + parser_get.add_argument('projectfile', help='Path to the file the project should be written to', + default='project.json') # xmlupload parser_upload = subparsers.add_parser('xmlupload', help='Upload data from an XML file to the DaSCH Service Platform.') @@ -101,7 +100,7 @@ def program(user_args: list[str]) -> None: # excel2resources parser_excel_resources = subparsers.add_parser('excel2resources', help='Create a JSON file from an Excel file ' - 'containing resources for a DSP ontology. ') + 'containing resources for a DSP ontology. ') parser_excel_resources.set_defaults(action='excel2resources') parser_excel_resources.add_argument('excelfile', help='Path to the Excel file containing the resources', default='resources.xlsx') @@ -147,19 +146,22 @@ def program(user_args: list[str]) -> None: if args.action == 'create': if args.lists_only: if args.validate_only: - validate_lists_section_with_schema(path_to_json_project_file=args.datamodelfile) + validate_lists_section_with_schema(path_to_json_project_file=args.projectfile) + print('"Lists" section of the JSON project file is syntactically correct and passed validation.') + exit(0) else: - create_lists(input_file=args.datamodelfile, + create_lists(input_file=args.projectfile, server=args.server, user=args.user, password=args.password, dump=args.dump) else: - if args.validate_only and validate_project(args.datamodelfile): - print('Data model is syntactically correct and passed validation.') + if args.validate_only: + validate_project(args.projectfile) + print('JSON project file is syntactically correct and passed validation.') exit(0) else: - create_project(input_file=args.datamodelfile, + create_project(input_file=args.projectfile, server=args.server, user_mail=args.user, password=args.password, @@ -167,15 +169,14 @@ def program(user_args: list[str]) -> None: dump=args.dump if args.dump else False) elif args.action == 'get': get_ontology(project_identifier=args.project, - outfile=args.datamodelfile, + outfile=args.projectfile, server=args.server, user=args.user, password=args.password, verbose=args.verbose) elif args.action == 'xmlupload': if args.validate: - validate_xml_against_schema(input_file=args.xmlfile, - schema_file="knora/dsplib/schemas/data.xsd") + validate_xml_against_schema(input_file=args.xmlfile) else: xml_upload(input_file=args.xmlfile, server=args.server, @@ -187,13 +188,13 @@ def program(user_args: list[str]) -> None: incremental=args.incremental) elif args.action == 'excel2lists': excel2lists(excelfolder=args.excelfolder, - outfile=args.outfile) + path_to_output_file=args.outfile) elif args.action == 'excel2resources': excel2resources(excelfile=args.excelfile, - outfile=args.outfile) + path_to_output_file=args.outfile) elif args.action == 'excel2properties': excel2properties(excelfile=args.excelfile, - outfile=args.outfile) + path_to_output_file=args.outfile) elif args.action == 'id2iri': id_to_iri(xml_file=args.xmlfile, json_file=args.jsonfile, diff --git a/knora/dsplib/utils/onto_create_lists.py b/knora/dsplib/utils/onto_create_lists.py index faff0fe71..565c91e33 100644 --- a/knora/dsplib/utils/onto_create_lists.py +++ b/knora/dsplib/utils/onto_create_lists.py @@ -71,10 +71,15 @@ def create_lists( dump: bool = False ) -> Tuple[dict[str, Any], bool]: """ - This method uploads the "lists" section of a JSON project definition file to a DSP server. If the JSON project file - is still unparsed, this method parses it, expands the Excel sheets that are referenced, and validates it. + This method uploads the "lists" section of a JSON project definition file to a DSP server. The project must already + exist on the DSP server. + + If the JSON project file is passed as "input_file", this method parses it, expands the Excel sheets that are + referenced, and validates it. If it is passed as "project_definition", these preliminary steps are not necessary. + The "lists" section of the parsed project definition is then uploaded to the DSP server. If a list with the same name is already existing in this project on the DSP server, this list is skipped. + Returns a tuple consisting of a dict and a bool. The dict contains the IRIs of the created list nodes. If there are no lists in the project definition, an empty dictionary is returned. The bool indicates if everything went smoothly during the process. If a warning or error occurred (e.g. one of the lists already exists, or one of the nodes could diff --git a/knora/dsplib/utils/onto_validate.py b/knora/dsplib/utils/onto_validate.py index ee5fc0836..c036f8b63 100644 --- a/knora/dsplib/utils/onto_validate.py +++ b/knora/dsplib/utils/onto_validate.py @@ -1,5 +1,5 @@ import os -import re +import regex from typing import Any, Union import jsonschema import json @@ -10,7 +10,7 @@ def validate_project( - input_file_or_json: Union[dict[str, Any], os.PathLike[Any]], + input_file_or_json: Union[dict[str, Any], str], expand_lists: bool = True ) -> bool: """ @@ -28,32 +28,31 @@ def validate_project( True if the project passed validation. Otherwise, a BaseError with a detailed error report is raised. """ - if isinstance(input_file_or_json, dict): + if isinstance(input_file_or_json, dict) and "project" in input_file_or_json: project_definition = input_file_or_json - elif os.path.isfile(input_file_or_json): + elif isinstance(input_file_or_json, str) and os.path.isfile(input_file_or_json) and regex.search(r"\.json$", input_file_or_json): with open(input_file_or_json) as f: - project_json_str = f.read() - project_definition = json.loads(project_json_str) + project_definition = json.load(f) else: raise BaseError(f"Input '{input_file_or_json}' is neither a file path nor a JSON object.") if expand_lists: # expand all lists referenced in the "lists" section of the project definition, and add them to the project # definition - new_lists, _ = expand_lists_from_excel(project_definition["project"].get("lists")) + new_lists, _ = expand_lists_from_excel(project_definition["project"].get("lists", [])) if new_lists: - project_definition['project']['lists'] = new_lists + project_definition["project"]["lists"] = new_lists # validate the project definition against the schema current_dir = os.path.dirname(os.path.realpath(__file__)) - with open(os.path.join(current_dir, '../schemas/ontology.json')) as s: + with open(os.path.join(current_dir, "../schemas/ontology.json")) as s: schema = json.load(s) try: jsonschema.validate(instance=project_definition, schema=schema) except jsonschema.exceptions.ValidationError as err: - raise BaseError(f'The JSON project file cannot be created due to the following validation error: {err.message}.\n' - f'The error occurred at {err.json_path}:\n' - f'{err.instance}') + raise BaseError(f"The JSON project file cannot be created due to the following validation error: {err.message}.\n" + f"The error occurred at {err.json_path}:\n" + f"{err.instance}") # cardinalities check for circular references if _check_cardinalities_of_circular_references(project_definition): @@ -81,24 +80,24 @@ def _check_cardinalities_of_circular_references(project_definition: dict[Any, An return True else: error_message = \ - 'ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references ' \ - 'between resources. This is not a problem in itself, but if you try to upload data that actually ' \ - 'contains circular references, these "hasLinkTo" properties will be temporarily removed from the ' \ - 'affected resources. Therefore, it is necessary that all involved "hasLinkTo" properties have a ' \ - 'cardinality of 0-1 or 0-n. \n' \ - 'Please make sure that the following properties have a cardinality of 0-1 or 0-n:' + "ERROR: Your ontology contains properties derived from 'hasLinkTo' that allow circular references " \ + "between resources. This is not a problem in itself, but if you try to upload data that actually " \ + "contains circular references, these 'hasLinkTo'' properties will be temporarily removed from the " \ + "affected resources. Therefore, it is necessary that all involved 'hasLinkTo' properties have a " \ + "cardinality of 0-1 or 0-n. \n" \ + "Please make sure that the following properties have a cardinality of 0-1 or 0-n:" for error in errors: - error_message = error_message + f'\n\t- Resource {error[0]}, property {error[1]}' + error_message = f"{error_message}\n\t- Resource {error[0]}, property {error[1]}" raise BaseError(error_message) def _collect_link_properties(project_definition: dict[Any, Any]) -> dict[str, list[str]]: """ map the properties derived from hasLinkTo to the resource classes they point to, for example: - link_properties = {'rosetta:hasImage2D': ['rosetta:Image2D'], ...} + link_properties = {"rosetta:hasImage2D": ["rosetta:Image2D"], ...} """ - ontos = project_definition['project']['ontologies'] - hasLinkTo_props = {'hasLinkTo', 'isPartOf', 'isRegionOf', 'isAnnotationOf'} + ontos = project_definition["project"]["ontologies"] + hasLinkTo_props = {"hasLinkTo", "isPartOf", "isRegionOf", "isAnnotationOf"} link_properties: dict[str, list[str]] = dict() for index, onto in enumerate(ontos): hasLinkTo_matches = list() @@ -106,28 +105,28 @@ def _collect_link_properties(project_definition: dict[Any, Any]) -> dict[str, li for i in range(5): for hasLinkTo_prop in hasLinkTo_props: hasLinkTo_matches.extend(jsonpath_ng.ext.parse( - f'$.project.ontologies[{index}].properties[?super[*] == {hasLinkTo_prop}]' + f"$.project.ontologies[{index}].properties[?super[*] == {hasLinkTo_prop}]" ).find(project_definition)) # make the children from this iteration to the parents of the next iteration - hasLinkTo_props = {x.value['name'] for x in hasLinkTo_matches} + hasLinkTo_props = {x.value["name"] for x in hasLinkTo_matches} prop_obj_pair: dict[str, list[str]] = dict() for match in hasLinkTo_matches: - prop = onto['name'] + ':' + match.value['name'] - target = match.value['object'] - if target != 'Resource': + prop = onto["name"] + ":" + match.value["name"] + target = match.value["object"] + if target != "Resource": # make the target a fully qualified name (with the ontology's name prefixed) - target = re.sub(r'^:([^:]+)$', f'{onto["name"]}:\\1', target) + target = regex.sub(r"^:([^:]+)$", f"{onto['name']}:\\1", target) prop_obj_pair[prop] = [target] link_properties.update(prop_obj_pair) # in case the object of a property is "Resource", the link can point to any resource class all_res_names: list[str] = list() for index, onto in enumerate(ontos): - matches = jsonpath_ng.ext.parse(f'$.resources[*].name').find(onto) - tmp = [f'{onto["name"]}:{match.value}' for match in matches] + matches = jsonpath_ng.ext.parse(f"$.resources[*].name").find(onto) + tmp = [f"{onto['name']}:{match.value}" for match in matches] all_res_names.extend(tmp) for prop, targ in link_properties.items(): - if 'Resource' in targ: + if "Resource" in targ: link_properties[prop] = all_res_names return link_properties @@ -138,31 +137,31 @@ def _identify_problematic_cardinalities(project_definition: dict[Any, Any], link make an error list with all cardinalities that are part of a circle but have a cardinality of "1" or "1-n" """ # make 2 dicts of the following form: - # dependencies = {'rosetta:Text': {'rosetta:hasImage2D': ['rosetta:Image2D'], ...}} - # cardinalities = {'rosetta:Text': {'rosetta:hasImage2D': '0-1', ...}} + # dependencies = {"rosetta:Text": {"rosetta:hasImage2D": ["rosetta:Image2D"], ...}} + # cardinalities = {"rosetta:Text": {"rosetta:hasImage2D": "0-1", ...}} dependencies: dict[str, dict[str, list[str]]] = dict() cardinalities: dict[str, dict[str, str]] = dict() - for onto in project_definition['project']['ontologies']: - for resource in onto['resources']: - resname: str = onto['name'] + ':' + resource['name'] - for card in resource['cardinalities']: + for onto in project_definition["project"]["ontologies"]: + for resource in onto["resources"]: + resname: str = onto["name"] + ":" + resource["name"] + for card in resource["cardinalities"]: # make the cardinality a fully qualified name (with the ontology's name prefixed) - cardname = re.sub(r'^(:?)([^:]+)$', f'{onto["name"]}:\\2', card['propname']) + cardname = regex.sub(r"^(:?)([^:]+)$", f"{onto['name']}:\\2", card["propname"]) if cardname in link_properties: # Look out: if `targets` is created with `targets = link_properties[cardname]`, the ex- # pression `dependencies[resname][cardname] = targets` causes `dependencies[resname][cardname]` # to point to `link_properties[cardname]`. Due to that, the expression - # `dependencies[resname][cardname].extend(targets)` will modify 'link_properties'! + # `dependencies[resname][cardname].extend(targets)` will modify "link_properties"! # For this reason, `targets` must be created with `targets = list(link_properties[cardname])` targets = list(link_properties[cardname]) if resname not in dependencies: dependencies[resname] = dict() dependencies[resname][cardname] = targets cardinalities[resname] = dict() - cardinalities[resname][cardname] = card['cardinality'] + cardinalities[resname][cardname] = card["cardinality"] elif cardname not in dependencies[resname]: dependencies[resname][cardname] = targets - cardinalities[resname][cardname] = card['cardinality'] + cardinalities[resname][cardname] = card["cardinality"] else: dependencies[resname][cardname].extend(targets) @@ -182,7 +181,7 @@ def _identify_problematic_cardinalities(project_definition: dict[Any, Any], link for property, targets in dependencies[resource].items(): if target in targets: prop = property - if cardinalities[resource][prop] not in ['0-1', '0-n']: + if cardinalities[resource][prop] not in ["0-1", "0-n"]: errors.add((resource, prop)) return sorted(errors, key=lambda x: x[0]) diff --git a/knora/dsplib/utils/shared.py b/knora/dsplib/utils/shared.py index 6ade52982..a99fd86ce 100644 --- a/knora/dsplib/utils/shared.py +++ b/knora/dsplib/utils/shared.py @@ -2,6 +2,7 @@ import unicodedata import pandas as pd import regex +import os from lxml import etree from requests import RequestException from datetime import datetime @@ -87,17 +88,18 @@ def try_network_action( raise BaseError(failure_msg) -def validate_xml_against_schema(input_file: str, schema_file: str) -> bool: +def validate_xml_against_schema(input_file: str) -> bool: """ Validates an XML file against an XSD schema Args: input_file: the XML file to be validated - schema_file: the schema against which the XML file should be validated Returns: True if the XML file is valid. Otherwise, a BaseError with a detailed error log is raised """ + current_dir = os.path.dirname(os.path.realpath(__file__)) + schema_file = os.path.join(current_dir, "../schemas/data.xsd") xmlschema = etree.XMLSchema(etree.parse(schema_file)) doc = etree.parse(input_file) diff --git a/knora/dsplib/utils/xml_upload.py b/knora/dsplib/utils/xml_upload.py index 65a0fa146..623b170c6 100644 --- a/knora/dsplib/utils/xml_upload.py +++ b/knora/dsplib/utils/xml_upload.py @@ -241,10 +241,8 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s """ # Validate the input XML file - current_dir = os.path.dirname(os.path.realpath(__file__)) - schema_file = os.path.join(current_dir, '../schemas/data.xsd') try: - validate_xml_against_schema(input_file, schema_file) + validate_xml_against_schema(input_file) except BaseError as err: print(f"=====================================\n" f"{err.message}") diff --git a/test/e2e/test_tools.py b/test/e2e/test_tools.py index f20f4d991..5da6c68de 100644 --- a/test/e2e/test_tools.py +++ b/test/e2e/test_tools.py @@ -1,70 +1,148 @@ -"""This test class tests the basic functionalities of dsp-tools""" +""" +This test class tests the basic functionalities of dsp-tools, i.e. all commands that can be called from the command +line. The methods are tested in the order in which teh appear in dsp_tools.py. This class only tests that the methods +can be called with the basic configuration that is available via CLI. More thorough testing of each method is done in +separate unit tests/e2e tests. +""" import json import unittest import os import datetime import re +import jsonpath_ng +import jsonpath_ng.ext +import copy -from knora.dsplib.utils import excel_to_json_lists +from knora.dsplib.utils.excel_to_json_lists import excel2lists, validate_lists_section_with_schema from knora.dsplib.utils.excel_to_json_properties import excel2properties from knora.dsplib.utils.excel_to_json_resources import excel2resources from knora.dsplib.utils.id_to_iri import id_to_iri from knora.dsplib.utils.onto_create_ontology import create_project +from knora.dsplib.utils.onto_validate import validate_project +from knora.dsplib.utils.onto_create_lists import create_lists from knora.dsplib.utils.onto_get import get_ontology from knora.dsplib.utils.xml_upload import xml_upload +from knora.dsplib.utils.shared import validate_xml_against_schema +from knora.excel2xml import excel2xml class TestTools(unittest.TestCase): - server = 'http://0.0.0.0:3333' - user = 'root@example.com' - password = 'test' - imgdir = '.' - sipi = 'http://0.0.0.0:1024' - test_project_systematic_file = 'testdata/test-project-systematic.json' - test_project_minimal_file = 'testdata/test-project-minimal.json' - test_data_systematic_file = 'testdata/test-data-systematic.xml' - test_data_minimal_file = 'testdata/test-data-minimal.xml' + server = "http://0.0.0.0:3333" + user = "root@example.com" + password = "test" + imgdir = "." + sipi = "http://0.0.0.0:1024" + test_project_systematic_file = "testdata/test-project-systematic.json" + test_project_minimal_file = "testdata/test-project-minimal.json" + test_data_systematic_file = "testdata/test-data-systematic.xml" + test_data_minimal_file = "testdata/test-data-minimal.xml" @classmethod def setUpClass(cls) -> None: """Is executed before the methods of this class are run""" - os.makedirs('testdata/tmp', exist_ok=True) + os.makedirs("testdata/tmp", exist_ok=True) @classmethod def tearDownClass(cls) -> None: """Is executed after the methods of this class have all run through""" - for file in os.listdir('testdata/tmp'): - os.remove('testdata/tmp/' + file) - os.rmdir('testdata/tmp') - for file in [f for f in os.listdir('.') if re.search(r'id2iri_.+\.json', f)]: + for file in os.listdir("testdata/tmp"): + os.remove("testdata/tmp/" + file) + os.rmdir("testdata/tmp") + for file in [f for f in os.listdir(".") if re.search(r"id2iri_.+\.json", f)]: os.remove(file) + def test_validate_lists_section_with_schema(self) -> None: + self.assertTrue(validate_lists_section_with_schema(self.test_project_systematic_file)) - def test_get(self) -> None: + + def test_create_lists(self) -> None: + # the project must already exist, so let's create a project without lists + create_project( + input_file=self.test_project_minimal_file, + server=self.server, + user_mail=self.user, + password="test", + verbose=True, + dump=False + ) + + # open a "lists" section and the project that was created + with open("testdata/lists_multilingual_output_expected.json") as f: + lists_section = json.load(f) + with open(self.test_project_minimal_file) as f: + test_project_minimal = json.load(f) + + # create a copy of the project that was created, and insert the first list into it + test_project_minimal_with_list_1 = copy.deepcopy(test_project_minimal) + test_project_minimal_with_list_1["project"]["lists"] = [lists_section[0], ] + + # create another copy of the project that was created, insert the second list into it, and save it as file + test_project_minimal_with_list_2 = copy.deepcopy(test_project_minimal) + test_project_minimal_with_list_2["project"]["lists"] = [lists_section[1], ] + with open("testdata/tmp/test_project_minimal_with_list_2.json", "x") as f: + json.dump(test_project_minimal_with_list_2, f) + + # The method to be tested can now be called with both versions of the same project. One is loaded from disk, + # the other is a Python object. The two projects each contain another list. + name2iri_mapping1, success1 = create_lists(server=self.server, + user=self.user, + password=self.password, + project_definition=test_project_minimal_with_list_1) + name2iri_mapping2, success2 = create_lists(server=self.server, + user=self.user, + password=self.password, + input_file="testdata/tmp/test_project_minimal_with_list_2.json") + + # test that both lists have been correctly created + self.assertTrue(success1) + self.assertTrue(success2) + name2iri_names_1 = [str(m.path) for m in jsonpath_ng.ext.parse("$..* where id").find(name2iri_mapping1)] + name2iri_names_2 = [str(m.path) for m in jsonpath_ng.ext.parse("$..* where id").find(name2iri_mapping2)] + node_names_1 = [m.value for m in jsonpath_ng.ext.parse("$.project.lists[*]..name").find(test_project_minimal_with_list_1)] + node_names_2 = [m.value for m in jsonpath_ng.ext.parse("$.project.lists[*]..name").find(test_project_minimal_with_list_2)] + self.assertListEqual(name2iri_names_1, node_names_1) + self.assertListEqual(name2iri_names_2, node_names_2) + + + def test_validate_project(self) -> None: + self.assertTrue(validate_project(self.test_project_systematic_file)) + + + def test_create_project(self) -> None: + result = create_project( + input_file=self.test_project_systematic_file, + server=self.server, + user_mail=self.user, + password="test", + verbose=True, + dump=False + ) + self.assertTrue(result) + + + def test_get_ontology(self) -> None: with open(self.test_project_systematic_file) as f: - project_json_str = f.read() - test_project = json.loads(project_json_str) + project_expected = json.load(f) - get_ontology(project_identifier='tp', - outfile='testdata/tmp/_test-project-systematic.json', + get_ontology(project_identifier="tp", + outfile="testdata/tmp/_test-project-systematic.json", server=self.server, user=self.user, - password='test', + password="test", verbose=True) - with open('testdata/tmp/_test-project-systematic.json') as f: - project_json_str = f.read() - test_project_out = json.loads(project_json_str) + with open("testdata/tmp/_test-project-systematic.json") as f: + project_received = json.load(f) - self.assertEqual(test_project['project']['shortcode'], test_project_out['project']['shortcode']) - self.assertEqual(test_project['project']['shortname'], test_project_out['project']['shortname']) - self.assertEqual(test_project['project']['longname'], test_project_out['project']['longname']) - self.assertEqual(test_project['project']['descriptions'], test_project_out['project']['descriptions']) - self.assertEqual(sorted(test_project['project']['keywords']), sorted(test_project_out['project']['keywords'])) + self.assertEqual(project_expected["project"]["shortcode"], project_received["project"]["shortcode"]) + self.assertEqual(project_expected["project"]["shortname"], project_received["project"]["shortname"]) + self.assertEqual(project_expected["project"]["longname"], project_received["project"]["longname"]) + self.assertEqual(project_expected["project"]["descriptions"], project_received["project"]["descriptions"]) + self.assertEqual(sorted(project_expected["project"]["keywords"]), sorted(project_received["project"]["keywords"])) - groups_expected = test_project['project']['groups'] - groups_received = test_project_out['project']['groups'] + groups_expected = project_expected["project"]["groups"] + groups_received = project_received["project"]["groups"] group_names_expected = [] group_descriptions_expected = [] group_selfjoin_expected = [] @@ -88,8 +166,8 @@ def test_get(self) -> None: self.assertEqual(group_selfjoin_expected, group_selfjoin_received) self.assertEqual(group_status_expected, group_status_received) - users_expected = test_project['project']['users'] - users_received = test_project_out['project']['users'] + users_expected = project_expected["project"]["users"] + users_received = project_received["project"]["users"] user_username_expected = [] user_email_expected = [] user_given_name_expected = [] @@ -121,8 +199,8 @@ def test_get(self) -> None: self.assertEqual(sorted(user_family_name_expected), sorted(user_family_name_received)) self.assertEqual(sorted(user_lang_expected), sorted(user_lang_received)) - ontos_expected = test_project['project']['ontologies'] - ontos_received = test_project_out['project']['ontologies'] + ontos_expected = project_expected["project"]["ontologies"] + ontos_received = project_received["project"]["ontologies"] onto_names_expected = [] onto_labels_expected = [] onto_names_received = [] @@ -136,57 +214,30 @@ def test_get(self) -> None: self.assertEqual(sorted(onto_names_expected), sorted(onto_names_received)) self.assertEqual(sorted(onto_labels_expected), sorted(onto_labels_received)) - lists = test_project['project']['lists'] - test_list: dict[str, str] = next((l for l in lists if l['name'] == 'testlist'), {}) - not_used_list: dict[str, str] = next((l for l in lists if l['name'] == 'notUsedList'), {}) - excel_list: dict[str, str] = next((l for l in lists if l['name'] == 'my-list-from-excel'), {}) + lists = project_expected["project"]["lists"] + test_list: dict[str, str] = next((l for l in lists if l["name"] == "testlist"), {}) + not_used_list: dict[str, str] = next((l for l in lists if l["name"] == "notUsedList"), {}) + excel_list: dict[str, str] = next((l for l in lists if l["name"] == "my-list-from-excel"), {}) - lists_out = test_project_out['project']['lists'] - test_list_out: dict[str, str] = next((l for l in lists_out if l['name'] == 'testlist'), {}) - not_used_list_out: dict[str, str] = next((l for l in lists_out if l['name'] == 'notUsedList'), {}) - excel_list_out: dict[str, str] = next((l for l in lists_out if l['name'] == 'my-list-from-excel'), {}) + lists_out = project_received["project"]["lists"] + test_list_out: dict[str, str] = next((l for l in lists_out if l["name"] == "testlist"), {}) + not_used_list_out: dict[str, str] = next((l for l in lists_out if l["name"] == "notUsedList"), {}) + excel_list_out: dict[str, str] = next((l for l in lists_out if l["name"] == "my-list-from-excel"), {}) - self.assertEqual(test_list.get('labels'), test_list_out.get('labels')) - self.assertEqual(test_list.get('comments'), test_list_out.get('comments')) - self.assertEqual(test_list.get('nodes'), test_list_out.get('nodes')) + self.assertEqual(test_list.get("labels"), test_list_out.get("labels")) + self.assertEqual(test_list.get("comments"), test_list_out.get("comments")) + self.assertEqual(test_list.get("nodes"), test_list_out.get("nodes")) - self.assertEqual(not_used_list.get('labels'), not_used_list_out.get('labels')) - self.assertEqual(not_used_list.get('comments'), not_used_list_out.get('comments')) - self.assertEqual(not_used_list.get('nodes'), not_used_list_out.get('nodes')) + self.assertEqual(not_used_list.get("labels"), not_used_list_out.get("labels")) + self.assertEqual(not_used_list.get("comments"), not_used_list_out.get("comments")) + self.assertEqual(not_used_list.get("nodes"), not_used_list_out.get("nodes")) - self.assertEqual(excel_list.get('comments'), excel_list_out.get('comments')) + self.assertEqual(excel_list.get("comments"), excel_list_out.get("comments")) - def test_excel_to_json_list(self) -> None: - excel_to_json_lists.excel2lists(excelfolder='testdata/lists_multilingual', - path_to_output_file='testdata/tmp/_lists-out.json') - def test_excel_to_json_resources(self) -> None: - excel2resources(excelfile='testdata/Resources.xlsx', - path_to_output_file='testdata/tmp/_out_resources.json') - - def test_excel_to_json_properties(self) -> None: - excel2properties(excelfile='testdata/Properties.xlsx', - path_to_output_file='testdata/tmp/_out_properties.json') + def test_validate_xml_against_schema(self) -> None: + self.assertTrue(validate_xml_against_schema(self.test_data_systematic_file)) - def test_create_project(self) -> None: - result1 = create_project( - input_file=self.test_project_systematic_file, - server=self.server, - user_mail=self.user, - password='test', - verbose=True, - dump=False - ) - result2 = create_project( - input_file=self.test_project_minimal_file, - server=self.server, - user_mail=self.user, - password='test', - verbose=True, - dump=False - ) - self.assertTrue(result1) - self.assertTrue(result2) def test_xml_upload(self) -> None: result_minimal = xml_upload( @@ -211,19 +262,19 @@ def test_xml_upload(self) -> None: incremental=False) self.assertTrue(result_systematic) - mapping_file = '' - for mapping in [x for x in os.scandir('.') if x.name.startswith('id2iri_test-data-systematic_mapping_')]: + mapping_file = "" + for mapping in [x for x in os.scandir(".") if x.name.startswith("id2iri_test-data-systematic_mapping_")]: delta = datetime.datetime.now() - datetime.datetime.fromtimestamp(mapping.stat().st_mtime_ns / 1000000000) if delta.seconds < 15: mapping_file = mapping.name - self.assertNotEqual(mapping_file, '') + self.assertNotEqual(mapping_file, "") - id2iri_replaced_xml_filename = 'testdata/tmp/_test-id2iri-replaced.xml' - id_to_iri(xml_file='testdata/test-id2iri-data.xml', + id2iri_replaced_xml_filename = "testdata/tmp/_test-id2iri-replaced.xml" + id_to_iri(xml_file="testdata/test-id2iri-data.xml", json_file=mapping_file, out_file=id2iri_replaced_xml_filename, verbose=True) - self.assertEqual(os.path.isfile(id2iri_replaced_xml_filename), True) + self.assertTrue(os.path.isfile(id2iri_replaced_xml_filename)) result_replaced = xml_upload( input_file=id2iri_replaced_xml_filename, @@ -236,9 +287,48 @@ def test_xml_upload(self) -> None: incremental=True ) self.assertTrue(result_replaced) - self.assertTrue(all([not f.name.startswith('stashed_text_properties_') for f in os.scandir('.')])) - self.assertTrue(all([not f.name.startswith('stashed_resptr_properties_') for f in os.scandir('.')])) + self.assertTrue(all([not f.name.startswith("stashed_text_properties_") for f in os.scandir(".")])) + self.assertTrue(all([not f.name.startswith("stashed_resptr_properties_") for f in os.scandir(".")])) + + os.remove(mapping_file) + os.remove(id2iri_replaced_xml_filename) + + + def test_excel_to_json_list(self) -> None: + excel2lists(excelfolder="testdata/lists_multilingual", + path_to_output_file="testdata/tmp/_lists-out.json") + self.assertTrue(os.path.isfile("testdata/tmp/_lists-out.json")) + os.remove("testdata/tmp/_lists-out.json") + + + def test_excel_to_json_resources(self) -> None: + excel2resources(excelfile="testdata/Resources.xlsx", + path_to_output_file="testdata/tmp/_out_resources.json") + self.assertTrue(os.path.isfile("testdata/tmp/_out_resources.json")) + os.remove("testdata/tmp/_out_resources.json") + + + def test_excel_to_json_properties(self) -> None: + excel2properties(excelfile="testdata/Properties.xlsx", + path_to_output_file="testdata/tmp/_out_properties.json") + self.assertTrue(os.path.isfile("testdata/tmp/_out_properties.json")) + os.remove("testdata/tmp/_out_properties.json") + + + def test_id_to_iri(self) -> None: + id_to_iri(xml_file="testdata/test-id2iri-data.xml", + json_file="testdata/test-id2iri-mapping.json", + out_file="testdata/tmp/test-id2iri-out.xml", + verbose=True) + self.assertTrue(os.path.isfile("testdata/tmp/test-id2iri-out.xml")) + os.remove("testdata/tmp/test-id2iri-out.xml") + + + def test_excel2xml(self) -> None: + excel2xml("testdata/excel2xml-testdata.xlsx", "1234", "excel2xml-output") + self.assertTrue(os.path.isfile("excel2xml-output-data.xml")) + os.remove("excel2xml-output-data.xml") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/unittests/test_create_ontology.py b/test/unittests/test_create_ontology.py deleted file mode 100644 index cf51911ab..000000000 --- a/test/unittests/test_create_ontology.py +++ /dev/null @@ -1,59 +0,0 @@ -"""unit tests for ontology creation""" -import unittest -import json -from typing import Any - -from knora.dsplib.utils.onto_create_ontology import _sort_resources, _sort_prop_classes -from knora.dsplib.utils.onto_validate import _collect_link_properties, _identify_problematic_cardinalities - - -class TestOntoCreation(unittest.TestCase): - with open('testdata/test-project-systematic.json', 'r') as json_file: - project: dict[str, Any] = json.load(json_file) - ontology: dict[str, Any] = project['project']['ontologies'][0] - with open('testdata/test-project-circular-ontology.json', 'r') as json_file: - circular_onto: dict[str, Any] = json.load(json_file) - - def test_sort_resources(self) -> None: - """ - The 'resources' section of an onto is a list of dictionaries. The safest way to test - that the sorted list contains the same dicts is to sort both lists according to the - same criteria, and then test for list equality. - """ - onto_name: str = self.ontology['name'] - unsorted_resources: list[dict[str, Any]] = self.ontology['resources'] - sorted_resources = _sort_resources(unsorted_resources, onto_name) - - unsorted_resources = sorted(unsorted_resources, key=lambda a: str(a['name'])) - sorted_resources = sorted(sorted_resources, key=lambda a: str(a['name'])) - - self.assertListEqual(unsorted_resources, sorted_resources) - - def test_sort_prop_classes(self) -> None: - """ - The 'properties' section of an onto is a list of dictionaries. The safest way to test - that the sorted list contains the same dicts is to sort both lists according to the - same criteria, and then test for list equality. - """ - onto_name: str = self.ontology['name'] - unsorted_props: list[dict[str, Any]] = self.ontology['resources'] - sorted_props = _sort_prop_classes(unsorted_props, onto_name) - - unsorted_props = sorted(unsorted_props, key=lambda a: str(a['name'])) - sorted_props = sorted(sorted_props, key=lambda a: str(a['name'])) - - self.assertListEqual(unsorted_props, sorted_props) - - - def test_circular_references_in_onto(self) -> None: - link_properties = _collect_link_properties(self.circular_onto) - errors = _identify_problematic_cardinalities(self.circular_onto, link_properties) - expected_errors = [ - ('testonto:AnyResource', 'testonto:linkToTestThing1'), - ('testonto:TestThing3', 'testonto:linkToResource') - ] - self.assertListEqual(sorted(errors), sorted(expected_errors)) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/unittests/test_create_project.py b/test/unittests/test_create_project.py new file mode 100644 index 000000000..eeeeb6f7a --- /dev/null +++ b/test/unittests/test_create_project.py @@ -0,0 +1,75 @@ +"""unit tests for ontology creation""" +import unittest +import json +from typing import Any + +from knora.dsplib.models.helpers import BaseError +from knora.dsplib.utils.onto_create_ontology import _sort_resources, _sort_prop_classes +from knora.dsplib.utils.onto_validate import _collect_link_properties, _identify_problematic_cardinalities, validate_project + + +class TestProjectCreation(unittest.TestCase): + test_project_systematic_file = "testdata/test-project-systematic.json" + with open(test_project_systematic_file, "r") as json_file: + test_project_systematic: dict[str, Any] = json.load(json_file) + test_project_systematic_ontology: dict[str, Any] = test_project_systematic["project"]["ontologies"][0] + test_project_circular_ontology_file = "testdata/test-project-circular-ontology.json" + with open(test_project_circular_ontology_file, "r") as json_file: + test_project_circular_ontology: dict[str, Any] = json.load(json_file) + + + def test_sort_resources(self) -> None: + """ + The "resources" section of an onto is a list of dictionaries. The safest way to test + that the sorted list contains the same dicts is to sort both lists according to the + same criteria, and then test for list equality. + """ + onto_name: str = self.test_project_systematic_ontology["name"] + unsorted_resources: list[dict[str, Any]] = self.test_project_systematic_ontology["resources"] + sorted_resources = _sort_resources(unsorted_resources, onto_name) + + unsorted_resources = sorted(unsorted_resources, key=lambda a: str(a["name"])) + sorted_resources = sorted(sorted_resources, key=lambda a: str(a["name"])) + + self.assertListEqual(unsorted_resources, sorted_resources) + + + def test_sort_prop_classes(self) -> None: + """ + The "properties" section of an onto is a list of dictionaries. The safest way to test + that the sorted list contains the same dicts is to sort both lists according to the + same criteria, and then test for list equality. + """ + onto_name: str = self.test_project_systematic_ontology["name"] + unsorted_props: list[dict[str, Any]] = self.test_project_systematic_ontology["resources"] + sorted_props = _sort_prop_classes(unsorted_props, onto_name) + + unsorted_props = sorted(unsorted_props, key=lambda a: str(a["name"])) + sorted_props = sorted(sorted_props, key=lambda a: str(a["name"])) + + self.assertListEqual(unsorted_props, sorted_props) + + + def test_validate_project(self) -> None: + self.assertTrue(validate_project(self.test_project_systematic_file)) + self.assertTrue(validate_project(self.test_project_systematic)) + with self.assertRaisesRegex(BaseError, r"Input 'fantasy.xyz' is neither a file path nor a JSON object."): + validate_project("fantasy.xyz") + with self.assertRaisesRegex(BaseError, r"validation error: 'hasColor' does not match"): + validate_project("testdata/test-project-invalid-super-property.json") + with self.assertRaisesRegex(BaseError, r"ERROR: Your ontology contains properties derived from 'hasLinkTo'"): + validate_project(self.test_project_circular_ontology) + + + def test_circular_references_in_onto(self) -> None: + link_properties = _collect_link_properties(self.test_project_circular_ontology) + errors = _identify_problematic_cardinalities(self.test_project_circular_ontology, link_properties) + expected_errors = [ + ("testonto:AnyResource", "testonto:linkToTestThing1"), + ("testonto:TestThing3", "testonto:linkToResource") + ] + self.assertListEqual(sorted(errors), sorted(expected_errors)) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/unittests/test_shared_methods.py b/test/unittests/test_shared.py similarity index 76% rename from test/unittests/test_shared_methods.py rename to test/unittests/test_shared.py index 26ce3f220..c78fa8a84 100644 --- a/test/unittests/test_shared_methods.py +++ b/test/unittests/test_shared.py @@ -1,11 +1,23 @@ import unittest import pandas as pd import numpy as np + +from knora.dsplib.models.helpers import BaseError from knora.dsplib.utils import shared from knora.dsplib.models.propertyelement import PropertyElement -class TestSharedMethods(unittest.TestCase): +class TestShared(unittest.TestCase): + def test_validate_xml_against_schema(self) -> None: + self.assertTrue(shared.validate_xml_against_schema("testdata/test-data-systematic.xml")) + self.assertTrue(shared.validate_xml_against_schema("testdata/test-data-minimal.xml")) + with self.assertRaisesRegex( + BaseError, + "Line 12: Element '{https://dasch.swiss/schema}resource', attribute 'invalidtag': " + "The attribute 'invalidtag' is not allowed" + ): + shared.validate_xml_against_schema("testdata/test-data-invalid-resource-tag.xml") + def test_prepare_dataframe(self) -> None: original_df = pd.DataFrame({ " TitLE of Column 1 ": ["1", " 0-1 ", "1-n ", pd.NA, " ", " ", "", " 0-n ", np.nan], diff --git a/testdata/test-data-invalid-resource-tag.xml b/testdata/test-data-invalid-resource-tag.xml new file mode 100644 index 000000000..fbbb8f6d8 --- /dev/null +++ b/testdata/test-data-invalid-resource-tag.xml @@ -0,0 +1,15 @@ + + + + + + + + diff --git a/testdata/test-project-circular-ontology.json b/testdata/test-project-circular-ontology.json index 6a26d5940..0cbfb9626 100644 --- a/testdata/test-project-circular-ontology.json +++ b/testdata/test-project-circular-ontology.json @@ -1,4 +1,5 @@ { + "$schema": "../knora/dsplib/schemas/ontology.json", "project": { "shortcode": "1233", "shortname": "test", @@ -6,6 +7,10 @@ "descriptions": { "en": "test" }, + "keywords": [ + "test", + "testing" + ], "ontologies": [ { "name": "testonto", @@ -37,23 +42,22 @@ { "name": "linkToTestThing2", "super": [ - "isAnnotationOf", - "foaf:fantasy" + "hasLinkTo" ], "object": ":TestThing2", "labels": { - "en": "has region" + "en": "has a link to TestThing2" }, "gui_element": "Searchbox" }, { "name": "linkToTestThing3", "super": [ - "isRegionOf" + "hasLinkTo" ], "object": ":TestThing3", "labels": { - "en": "has region" + "en": "has a link to TestThing3" }, "gui_element": "Searchbox" } diff --git a/testdata/test-project-invalid-super-property.json b/testdata/test-project-invalid-super-property.json new file mode 100644 index 000000000..4434ca2a3 --- /dev/null +++ b/testdata/test-project-invalid-super-property.json @@ -0,0 +1,48 @@ +{ + "$schema": "../knora/dsplib/schemas/ontology.json", + "project": { + "shortcode": "4124", + "shortname": "minimal-tp", + "longname": "minimal test project", + "descriptions": { + "en": "A minimal test project" + }, + "keywords": [ + "minimal" + ], + "ontologies": [ + { + "name": "minimalOnto", + "label": "minimal onto", + "properties": [ + { + "name": "hasText", + "super": [ + "hasColor" + ], + "object": "TextValue", + "labels": { + "en": "Text" + }, + "gui_element": "SimpleText" + } + ], + "resources": [ + { + "name": "minimalResource", + "super": "Resource", + "labels": { + "en": "Minimal Resource" + }, + "cardinalities": [ + { + "propname": ":hasText", + "cardinality": "0-n" + } + ] + } + ] + } + ] + } +}