From ab0e3b26936e53df49a3fa67e3564b7c16f168e0 Mon Sep 17 00:00:00 2001 From: Johannes Nussbaum <39048939+jnussbaum@users.noreply.github.com> Date: Tue, 9 Aug 2022 11:03:54 +0200 Subject: [PATCH] fix: catch network interruptions during onto creation (DEV-1073) (#210) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - If there are temporary problems during the ontology creation process (like e.g. short connectivity interruption), dsp-tools will several times retry instead of giving up immediately. - The console output during the onto creation is now much more informative. - No more "lists.json" file is written during each onto creation. - No more doubling: the onto isn't validated two times any more, before being created on the DSP server, and the Excel file references in the “lists” section aren’t expanded two times any more. - Thorough code refactoring of the onto creation process. --- docs/dsp-tools-create.md | 35 +- docs/dsp-tools-usage.md | 8 +- knora/dsp_tools.py | 36 +- knora/dsplib/models/group.py | 23 +- knora/dsplib/models/project.py | 3 +- knora/dsplib/utils/excel_to_json_lists.py | 250 +++-- knora/dsplib/utils/expand_all_lists.py | 39 - knora/dsplib/utils/onto_create_lists.py | 244 ++--- knora/dsplib/utils/onto_create_ontology.py | 900 +++++++++--------- knora/dsplib/utils/onto_validate.py | 91 +- knora/dsplib/utils/shared_methods.py | 84 ++ knora/dsplib/utils/xml_upload.py | 184 ++-- test/e2e/test_tools.py | 89 +- test/unittests/test_create_ontology.py | 16 +- test/unittests/test_id_to_iri.py | 4 +- test/unittests/test_xmlupload.py | 4 +- ...on => test-project-circular-ontology.json} | 0 testdata/test-project-minimal.json | 48 + ...onto.json => test-project-systematic.json} | 40 +- 19 files changed, 1124 insertions(+), 974 deletions(-) delete mode 100644 knora/dsplib/utils/expand_all_lists.py create mode 100644 knora/dsplib/utils/shared_methods.py rename testdata/{circular-onto.json => test-project-circular-ontology.json} (100%) create mode 100644 testdata/test-project-minimal.json rename testdata/{test-onto.json => test-project-systematic.json} (96%) diff --git a/docs/dsp-tools-create.md b/docs/dsp-tools-create.md index 527898b84..3e20116f8 100644 --- a/docs/dsp-tools-create.md +++ b/docs/dsp-tools-create.md @@ -363,12 +363,12 @@ The groups that were created here are then available in the XML file in the A group definition has the following elements: -- _name_: name of the group, mandatory -- _descriptions_: description of the group with language tags in the form `"descriptions": {"": "", ...}` ( - currently "en", "de", "fr", "it", and "rm" are supported), mandatory -- _selfjoin_: true if users are allowed to join the group themselves, false if an administrator has to add the users, - optional -- _status_: true if the group is active, false if the group is inactive, optional +- _name_ (mandatory): name of the group +- _descriptions_ (mandatory): description of the group with language tags in the form `"descriptions": {"": + "", ...}` (currently "en", "de", "fr", "it", and "rm" are supported) +- _selfjoin_ (optional): true if users are allowed to join the group themselves, false (default) if an administrator has + to add them +- _status_ (optional): true (default) if the group is active, false if the group is inactive Example: @@ -399,13 +399,16 @@ This object contains user definitions. A user has the following elements: - _familyName_: surname of the user - _password_: password of the user - _lang_: the default language of the user: "en", "de", "fr", "it" (optional, default: "en") -- _groups_: List of groups the user belongs to. The name of the group has to be provided with the project's shortname, - p.ex. "shortname:editors". The project defined in the same ontology file has no name, so only ":editors" is required - if the user belongs to the group "editors". (optional) -- _projects_: List of projects the user belongs to. The project name has to be followed by a ":" and either "member" - or "admin". This indicates if the new user has admin rights in the given project or is an ordinary - user. `myproject:admin` would add the user as admin to the project "myproject". The given project defined in the same - ontology file has no name, so only ":admin"or ":member" is required. (optional) +- _groups_ (optional): List of groups the user belongs to. The group names must be provided in one of the following forms: + - `other_project_shortname:groupname` + - `:groupname` (for groups defined in the current ontology file) + - `SystemAdmin` (the most powerful group, built-in into DSP) +- _projects_ (optional): List of projects the user belongs to. The project name has to be followed by a `:` and either + `member` or `admin`. This indicates if the new user has admin rights in the given project or is an ordinary + user. `myproject:admin` would add the user as admin to the project `myproject`. The project defined in the same + ontology file can be omitted, so only `:admin` or `:member` is enough. + - If _projects_ is omitted, the user won't be part in any project. +- _status_ (optional): true (default) if the user is active, false if the user is deleted/inactive Example: @@ -420,12 +423,14 @@ Example: "password": "biz1234", "lang": "en", "groups": [ - ":biz-editors" + ":biz-editors", + "SystemAdmin" ], "projects": [ ":admin", "otherProject:member" - ] + ], + "status": true } ] } diff --git a/docs/dsp-tools-usage.md b/docs/dsp-tools-usage.md index eee904b77..1e64c519e 100644 --- a/docs/dsp-tools-usage.md +++ b/docs/dsp-tools-usage.md @@ -35,10 +35,10 @@ The following options are available: - `-s` | `--server` _server_: URL of the DSP server (default: 0.0.0.0:3333) - `-u` | `--user` _username_: username used for authentication with the DSP API (default: root@example.com) - `-p` | `--password` _password_: password used for authentication with the DSP API (default: test) -- `-V` | `--validate`: If set, only the validation of the JSON file is performed. -- `-l` | `--lists`: If set, only the lists are created using a [simplified schema](./dsp-tools-create.md#lists). Please - note that in this case the project must already exist. -- `-v` | `--verbose`: If set, some information about the progress is printed to the console. +- `-V` | `--validate-only`: If set, only the validation of the JSON file is performed. +- `-l` | `--lists-only`: If set, only the lists are created. Please note that in this case the project must already exist. +- `-v` | `--verbose`: If set, more information about the progress is printed to the console. +- `-d` | `--dump`: If set, dump test files for DSP-API requests. The command is used to read the definition of a data model (provided in a JSON file) and create it on the DSP server. The following example shows how to load the ontology defined in `data_model_definition.json` onto the DSP diff --git a/knora/dsp_tools.py b/knora/dsp_tools.py index a24ed3f0e..0c74db520 100644 --- a/knora/dsp_tools.py +++ b/knora/dsp_tools.py @@ -11,9 +11,9 @@ from knora.dsplib.utils.excel_to_json_resources import resources_excel2json from knora.dsplib.utils.id_to_iri import id_to_iri from knora.dsplib.utils.onto_create_lists import create_lists -from knora.dsplib.utils.onto_create_ontology import create_ontology +from knora.dsplib.utils.onto_create_ontology import create_project from knora.dsplib.utils.onto_get import get_ontology -from knora.dsplib.utils.onto_validate import validate_ontology +from knora.dsplib.utils.onto_validate import validate_project from knora.dsplib.utils.xml_upload import xml_upload @@ -55,12 +55,10 @@ def program(user_args: list[str]) -> None: parser_create.add_argument('-s', '--server', type=str, default=default_localhost, help=url_text) parser_create.add_argument('-u', '--user', default=default_user, help=username_text) parser_create.add_argument('-p', '--password', default=default_pw, help=password_text) - parser_create.add_argument('-V', '--validate', action='store_true', + parser_create.add_argument('-V', '--validate-only', action='store_true', help='Do only validation of JSON, no upload of the ' 'ontology') - parser_create.add_argument('-L', '--listfile', type=str, default='lists-only.json', - help='Name of list node informationfile') - parser_create.add_argument('-l', '--lists', action='store_true', help='Upload only the list(s)') + parser_create.add_argument('-l', '--lists-only', action='store_true', help='Upload only the list(s)') parser_create.add_argument('-v', '--verbose', action='store_true', help=verbose_text) parser_create.add_argument('-d', '--dump', action='store_true', help='dump test files for DSP-API requests') parser_create.add_argument('datamodelfile', help='path to data model file') @@ -142,31 +140,27 @@ def program(user_args: list[str]) -> None: exit(0) if args.action == 'create': - if args.lists: - if args.validate: + if args.lists_only: + if args.validate_only: validate_list_with_schema(args.datamodelfile) else: create_lists(input_file=args.datamodelfile, - lists_file=args.listfile, server=args.server, user=args.user, password=args.password, - verbose=args.verbose, dump=args.dump) else: - if args.validate: - if validate_ontology(args.datamodelfile): + if args.validate_only: + if validate_project(args.datamodelfile): + print('Data model is syntactically correct and passed validation.') exit(0) - else: - exit(1) else: - create_ontology(input_file=args.datamodelfile, - lists_file=args.listfile, - server=args.server, - user_mail=args.user, - password=args.password, - verbose=args.verbose, - dump=args.dump if args.dump else False) + create_project(input_file=args.datamodelfile, + server=args.server, + user_mail=args.user, + password=args.password, + verbose=args.verbose, + dump=args.dump if args.dump else False) elif args.action == 'get': get_ontology(project_identifier=args.project, outfile=args.datamodelfile, diff --git a/knora/dsplib/models/group.py b/knora/dsplib/models/group.py index 148eb2aa7..a33af7e22 100644 --- a/knora/dsplib/models/group.py +++ b/knora/dsplib/models/group.py @@ -240,23 +240,18 @@ def delete(self): return Group.fromJsonObj(self._con, result['group']) @staticmethod - def getAllGroups(con: Connection) -> Optional[list[Group]]: - try: - result = con.get(Group.ROUTE) - return [Group.fromJsonObj(con, group_item) for group_item in result["groups"]] - except BaseError: - # return None if no groups are found or an error happened - return None + def getAllGroups(con: Connection) -> list[Group]: + result = con.get(Group.ROUTE) + return [Group.fromJsonObj(con, group_item) for group_item in result["groups"]] @staticmethod def getAllGroupsForProject(con: Connection, proj_shortcode: str) -> Optional[list[Group]]: - all_groups: Optional[list[Group]] = Group.getAllGroups(con) - if all_groups: - project_groups = [] - for group in all_groups: - if group.project == "http://rdfh.ch/projects/" + proj_shortcode: - project_groups.append(group) - return project_groups + all_groups = Group.getAllGroups(con) + project_groups = [] + for group in all_groups: + if group.project == "http://rdfh.ch/projects/" + proj_shortcode: + project_groups.append(group) + return project_groups def createDefinitionFileObj(self): group = { diff --git a/knora/dsplib/models/project.py b/knora/dsplib/models/project.py index 58ac7bc14..e4308627d 100644 --- a/knora/dsplib/models/project.py +++ b/knora/dsplib/models/project.py @@ -446,7 +446,8 @@ def read(self) -> Project: if result is not None: return Project.fromJsonObj(self._con, result['project']) else: - return None # Todo: throw exception + raise BaseError(f"ERROR: Could not read project '{self.shortname}' ({self.shortcode}) with IRI {self._id} " + f"from DSP server.") def update(self) -> Project: """ diff --git a/knora/dsplib/utils/excel_to_json_lists.py b/knora/dsplib/utils/excel_to_json_lists.py index 90f3b48f9..bb70e6e42 100644 --- a/knora/dsplib/utils/excel_to_json_lists.py +++ b/knora/dsplib/utils/excel_to_json_lists.py @@ -1,29 +1,76 @@ """This module handles all the operations which are used for the creation of JSON lists from Excel files.""" -import csv import glob import json import os import re import unicodedata -from typing import Any, Union, Optional +from typing import Any, Union, Optional, Tuple import jsonschema from openpyxl import load_workbook from openpyxl.cell import Cell from openpyxl.worksheet.worksheet import Worksheet +from knora.dsplib.models.helpers import BaseError -# Global variables used to ensure that there are no duplicate node names list_of_lists_of_previous_cell_values: list[list[str]] = [] +"""Module level variable used to ensure that there are no duplicate node names""" + list_of_previous_node_names: list[str] = [] +"""Module level variable used to ensure that there are no duplicate node names""" + + +def expand_lists_from_excel( + lists_section: list[dict[str, Union[str, dict[str, Any]]]] +) -> Tuple[list[dict[str, Any]], bool]: + """ + Checks if the "lists" section of a JSON project file contains references to Excel files. Expands all Excel files to + JSON, and returns the expanded "lists" section. If there are no references to Excel files, the "lists" section is + returned as is. + Returns a tuple consisting of the expanded "lists" section and a boolean value: True if everything went smoothly, + False if one of the lists couldn't be expanded correctly. + Args: + lists_section: the "lists" section of a parsed JSON project file. If this is an empty list, an empty list will be returned. -def get_values_from_excel( + Returns: + the same "lists" section, but without references to Excel files + True if all lists could be expanded correctly, False if a problem occurred + """ + overall_success = True + new_lists = [] + for _list in lists_section: + if "folder" not in _list["nodes"]: + # this list is a JSON list: return it as it is + new_lists.append(_list) + else: + # this is a reference to a folder with Excel files + prepared_rootnode, excel_file_names = _prepare_list_creation( + excelfolder=_list["nodes"]["folder"], + listname=_list["name"], + comments=_list["comments"] + ) + try: + finished_list = _make_json_list_from_excel(prepared_rootnode, excel_file_names, verbose=False) + new_lists.append(finished_list) + print(f"\tThe list '{_list['name']}' contains a reference to the folder '{_list['nodes']['folder']}'. " + f"The Excel files therein will be temporarily expanded into the 'lists' section of your project.") + except BaseError as err: + print(f"\tWARNING: The list '{_list['name']}' contains a reference to the folder " + f"'{_list['nodes']['folder']}', but a problem occurred while trying to expand the Excel files " + f"therein into the 'lists' section of your project: {err.message}") + overall_success = False + + return new_lists, overall_success + + +def _get_values_from_excel( excelfiles: dict[str, Worksheet], base_file: dict[str, Worksheet], parentnode: dict[str, Any], row: int, col: int, - preval: list[str] + preval: list[str], + verbose: bool = False ) -> tuple[int, dict[str, Any]]: """ This function calls itself recursively to go through the Excel files. It extracts the cell values and creates @@ -36,6 +83,7 @@ def get_values_from_excel( row: The index of the current row of the Excel sheet col: The index of the current column of the Excel sheet preval: List of previous values, needed to check the consistency of the list hierarchy + verbose: verbose switch Returns: int: Row index for the next loop (current row index minus 1) @@ -47,12 +95,11 @@ def get_values_from_excel( cell: Cell = base_file_ws.cell(column=col, row=row) for excelfile in excelfiles.values(): - if any((not excelfile['A1'].value, excelfile['B1'].value)): - print(f'Inconsistency in Excel list: The first row must consist of exactly one value, in cell A1. ' - f'All other cells of row 1 must be empty.\nInstead, found the following:\n' - f'Cell A1: "{excelfile["A1"].value}"\n' - f'Cell B1: "{excelfile["B1"].value}"') - quit() + if any((not excelfile["A1"].value, excelfile["B1"].value)): + raise BaseError(f"ERROR: Inconsistency in Excel list: The first row must consist of exactly one value, in " + f"cell A1. All other cells of row 1 must be empty.\nInstead, found the following:\n" + f"Cell A1: '{excelfile['A1'].value}'\n" + f"Cell B1: '{excelfile['B1'].value}'") if col > 1: # append the cell value of the parent node (which is one value to the left of the current cell) to the list of @@ -63,58 +110,57 @@ def get_values_from_excel( # check if all predecessors in row (values to the left) are consistent with the values in preval list for idx, val in enumerate(preval[:-1]): if val != base_file_ws.cell(column=idx+1, row=row).value.strip(): - print(f'Inconsistency in Excel list: {val} not equal to ' - f'{base_file_ws.cell(column=idx+1, row=row).value.strip()}') - quit() + raise BaseError(f"ERROR: Inconsistency in Excel list: {val} not equal to " + f"{base_file_ws.cell(column=idx+1, row=row).value.strip()}") # loop through the row until the last (furthest right) value is found if base_file_ws.cell(column=col+1, row=row).value: - row, _ = get_values_from_excel( + row, _ = _get_values_from_excel( excelfiles=excelfiles, base_file=base_file, parentnode=currentnode, col=col+1, row=row, - preval=preval + preval=preval, + verbose=verbose ) # if value was last in row (no further values to the right), it's a node, continue here else: - # check if there are duplicate nodes (i.e. identical rows), quit the program if so + # check if there are duplicate nodes (i.e. identical rows), raise a BaseError if so new_check_list = preval.copy() new_check_list.append(cell.value.strip()) list_of_lists_of_previous_cell_values.append(new_check_list) - if contains_duplicates(list_of_lists_of_previous_cell_values): - print(f'There is at least one duplicate node in the list. Found duplicate in column {cell.column}, ' - f'row {cell.row}:\n"{cell.value.strip()}"') - quit(1) + if _contains_duplicates(list_of_lists_of_previous_cell_values): + raise BaseError(f"ERROR: There is at least one duplicate node in the list. Found duplicate in column " + f"{cell.column}, row {cell.row}:\n'{cell.value.strip()}'") # create a simplified version of the cell value and use it as name of the node - nodename = simplify_name(cell.value.strip()) + nodename = _simplify_name(cell.value.strip()) list_of_previous_node_names.append(nodename) # append a number (p.ex. node-name-2) if there are list nodes with identical names - if contains_duplicates(list_of_previous_node_names): + if _contains_duplicates(list_of_previous_node_names): n = list_of_previous_node_names.count(nodename) if n > 1: - nodename = nodename + '-' + str(n) + nodename = nodename + "-" + str(n) # read label values from the other Excel files (other languages) labels_dict: dict[str, str] = {} for other_lang, ws_other_lang in excelfiles.items(): cell_value = ws_other_lang.cell(column=col, row=row).value if not(isinstance(cell_value, str) and len(cell_value) > 0): - print(f'ERROR: Malformed Excel file: The Excel file with the language code "{other_lang}" ' - f'should have a value in row {row}, column {col}') - quit(1) + raise BaseError(f"ERROR: Malformed Excel file: The Excel file with the language code " + f"'{other_lang}' should have a value in row {row}, column {col}") else: labels_dict[other_lang] = cell_value.strip() # create current node from extracted cell values and append it to the nodes list - currentnode = {'name': nodename, 'labels': labels_dict} + currentnode = {"name": nodename, "labels": labels_dict} nodes.append(currentnode) - print(f'Added list node: {cell.value.strip()} ({nodename})') + if verbose: + print(f"Added list node: {cell.value.strip()} ({nodename})") # go one row down and repeat loop if there is a value row += 1 @@ -124,19 +170,20 @@ def get_values_from_excel( preval.pop() # add the new nodes to the parentnode - parentnode['nodes'] = nodes + parentnode["nodes"] = nodes return row - 1, parentnode -def make_json_list_from_excel(rootnode: dict[str, Any], excelfile_names: list[str]) -> dict[str, Any]: +def _make_json_list_from_excel(rootnode: dict[str, Any], excel_file_names: list[str], verbose=False) -> dict[str, Any]: """ - Reads Excel files and makes a JSON list file from them. The JSON can then be used in an ontology that - is uploaded to the DaSCH Service Platform. + Reads Excel files and transforms them into a dict structure which can later be inserted into the "lists" array + of a JSON project file. Args: - rootnode: The root node of the JSON list - excelfile_names: A list with all the Excel files to be processed + rootnode: The root element on top of which the dict structure is going to be built + excel_file_names: Excel files to be processed + verbose: verbose switch Returns: The finished list as a dict @@ -148,36 +195,37 @@ def make_json_list_from_excel(rootnode: dict[str, Any], excelfile_names: list[st # Check if English file is available and take it as base file. Take last one from list of Excel files if English # is not available. The node names are later derived from the labels of the base file. base_file: dict[str, Worksheet] = dict() - for filename in excelfile_names: - if '_en.xlsx' in os.path.basename(filename): - lang = 'en' + for filename in excel_file_names: + if "_en.xlsx" in os.path.basename(filename): + lang = "en" ws = load_workbook(filename, read_only=True).worksheets[0] base_file = {lang: ws} if len(base_file) == 0: - file = excelfile_names[-1] - lang = os.path.splitext(file)[0].split('_')[-1] + file = excel_file_names[-1] + lang = os.path.splitext(file)[0].split("_")[-1] ws = load_workbook(file, read_only=True).worksheets[0] base_file = {lang: ws} excelfiles: dict[str, Worksheet] = {} - for f in excelfile_names: - lang = os.path.splitext(f)[0].split('_')[-1] + for f in excel_file_names: + lang = os.path.splitext(f)[0].split("_")[-1] ws = load_workbook(f, read_only=True).worksheets[0] excelfiles[lang] = ws - _, finished_list = get_values_from_excel( + _, finished_list = _get_values_from_excel( excelfiles=excelfiles, base_file=base_file, parentnode=rootnode, row=startrow, col=startcol, - preval=[] + preval=[], + verbose=verbose ) return finished_list -def contains_duplicates(list_to_check: list[Any]) -> bool: +def _contains_duplicates(list_to_check: list[Any]) -> bool: """ Checks if the given list contains any duplicate items. @@ -196,7 +244,7 @@ def contains_duplicates(list_to_check: list[Any]) -> bool: return has_duplicates -def simplify_name(value: str) -> str: +def _simplify_name(value: str) -> str: """ Simplifies a given value in order to use it as node name @@ -209,59 +257,58 @@ def simplify_name(value: str) -> str: simplified_value = str(value).lower() # normalize characters (p.ex. ä becomes a) - simplified_value = unicodedata.normalize('NFKD', simplified_value) + simplified_value = unicodedata.normalize("NFKD", simplified_value) # replace forward slash and whitespace with a dash - simplified_value = re.sub('[/\\s]+', '-', simplified_value) + simplified_value = re.sub("[/\\s]+", "-", simplified_value) # delete all characters which are not letters, numbers or dashes - simplified_value = re.sub('[^A-Za-z0-9\\-]+', '', simplified_value) + simplified_value = re.sub("[^A-Za-z0-9\\-]+", "", simplified_value) return simplified_value -def make_root_node_from_args( - excelfiles: list[str], +def _make_root_node_from_args( + excel_file_names: list[str], listname_from_args: Optional[str], comments: dict[str, str] ) -> dict[str, Any]: """ - Creates the root node for the JSON list + Creates the root node for the JSON list. Its name is chosen from one of the following sources: + 1. the user input, + 2. the English file name, + 3. the last file name + The labels are created from the file names. There are no subnodes appended yet. This method creates only the root + node. Args: - excelfiles: List of Excel files (names) to be checked - listname_from_args: Listname from arguments provided by the user via the command line - comments: Comments provided by the ontology + excel_file_names: List of the Excel file names + listname_from_args: name of the list provided by the user via the command line + comments: Comments provided by the JSON project file Returns: - dict: The root node of the list as dictionary (JSON) + The root node of the list as dictionary """ - listname_from_lang_code = {} - listname_en: str = '' - lang_specific_listname: str = '' + labels_dict = {} + listname_en = "" - for filename in excelfiles: + for filename in excel_file_names: basename = os.path.basename(filename) - lang_specific_listname, lang_code = os.path.splitext(basename)[0].rsplit('_', 1) + lang_specific_listname, lang_code = os.path.splitext(basename)[0].rsplit("_", 1) - if lang_code not in ['en', 'de', 'fr', 'it', 'rm']: - print(f'Invalid language code "{lang_code}" is used. Only en, de, fr, it, and rm are accepted.') - quit() + if lang_code not in ["en", "de", "fr", "it", "rm"]: + raise BaseError(f"ERROR: Invalid language code '{lang_code}' is used. Only en, de, fr, it, and rm are " + f"accepted.") - listname_from_lang_code[lang_code] = lang_specific_listname + labels_dict[lang_code] = lang_specific_listname - if '_en.xlsx' in filename: + if lang_code == "en": listname_en = lang_specific_listname # the listname is taken from the following sources, with descending priority - if listname_from_args: - listname = listname_from_args - elif listname_en: - listname = listname_en - else: - listname = lang_specific_listname + listname = listname_from_args or listname_en or lang_specific_listname - rootnode = {'name': listname, 'labels': listname_from_lang_code, 'comments': comments} + rootnode = {"name": listname, "labels": labels_dict, "comments": comments} return rootnode @@ -277,7 +324,7 @@ def validate_list_with_schema(json_list: str) -> bool: True if the list passed validation, False otherwise """ current_dir = os.path.dirname(os.path.realpath(__file__)) - with open(os.path.join(current_dir, '../schemas/lists-only.json')) as schema: + with open(os.path.join(current_dir, "../schemas/lists-only.json")) as schema: list_schema = json.load(schema) try: @@ -285,17 +332,18 @@ def validate_list_with_schema(json_list: str) -> bool: except jsonschema.exceptions.ValidationError as err: print(err) return False - print('List passed schema validation.') + print("List passed schema validation.") return True -def prepare_list_creation( +def _prepare_list_creation( excelfolder: str, listname: Optional[str], comments: dict[str, Any] ) -> tuple[dict[str, Any], list[str]]: """ - Creates the list from Excel files that can be used to build a JSON list. Then, creates the root node for the JSON list. + This method extracts the names of the Excel files that are in the folder, and creates the root node based on these + Excel file names. Args: excelfolder: path to the folder containing the Excel file(s) @@ -303,40 +351,32 @@ def prepare_list_creation( comments: comments for the list to be created Returns: - rootnode: The rootnode of the list as a dictionary - excel_files: list of the Excel files to process + rootnode: the empty root node of the list, as a dict + excel_files: list of the Excel file names to process """ - # reset the global variables before list creation starts + # reset the global variables global list_of_previous_node_names global list_of_lists_of_previous_cell_values - list_of_previous_node_names = [] list_of_lists_of_previous_cell_values = [] - # check if the given folder parameter is actually a folder if not os.path.isdir(excelfolder): - print(excelfolder, ' is not a directory.') - exit(1) - - # create a list with all excel files from the path provided by the user - excel_files = [filename for filename in glob.iglob(f'{excelfolder}/*.xlsx') - if not os.path.basename(filename).startswith('~$') - and os.path.isfile(filename)] + raise BaseError(f"ERROR: {excelfolder} is not a directory.") - # print the files that can be used - print('Found the following files:') - for file in excel_files: - print(file) + excel_file_names = [filename for filename in glob.iglob(f"{excelfolder}/*.xlsx") + if not os.path.basename(filename).startswith("~$") + and os.path.isfile(filename)] - # create root node of list - rootnode = make_root_node_from_args(excel_files, listname, comments) + # create the root node of the list, based on the Excel files and the user input + rootnode = _make_root_node_from_args(excel_file_names, listname, comments) - return rootnode, excel_files + return rootnode, excel_file_names def list_excel2json(listname: Union[str, None], excelfolder: str, outfile: str) -> None: """ - Takes the arguments from the command line, checks folder and files and starts the process of list creation. + This method writes a JSON file with a dict structure that can later be inserted into the "lists" array of a JSON + project file. Args: listname: name of the list to be created, file name is taken if omitted @@ -346,16 +386,18 @@ def list_excel2json(listname: Union[str, None], excelfolder: str, outfile: str) Return: None """ - # get the Excel files from the folder and create the rootnode of the list - rootnode, excel_files = prepare_list_creation(excelfolder, listname, comments={}) + # retrieve the Excel files from the folder and create the root node of the list + rootnode, excel_file_names = _prepare_list_creation(excelfolder, listname, comments={}) + print("The following Excel files will be processed:") + [print(f" - {filename}") for filename in excel_file_names] - # create the list from the Excel files - finished_list = make_json_list_from_excel(rootnode, excel_files) + # create the entire list from the Excel files + finished_list = _make_json_list_from_excel(rootnode, excel_file_names, verbose=True) # validate created list with schema if validate_list_with_schema(json.loads(json.dumps(finished_list, indent=4))): - with open(outfile, 'w', encoding='utf-8') as fp: + with open(outfile, "w", encoding="utf-8") as fp: json.dump(finished_list, fp, indent=4, sort_keys=False, ensure_ascii=False) - print('List was created successfully and written to file:', outfile) + print("List was created successfully and written to file:", outfile) else: - print('List is not valid according to schema.') + print("List is not valid according to schema.") diff --git a/knora/dsplib/utils/expand_all_lists.py b/knora/dsplib/utils/expand_all_lists.py deleted file mode 100644 index d3fa78d67..000000000 --- a/knora/dsplib/utils/expand_all_lists.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import Any, Union, Optional, cast - -from knora.dsplib.utils.excel_to_json_lists import make_json_list_from_excel, prepare_list_creation - - -def expand_lists_from_excel( - data_model: dict[str, dict[str, list[dict[str, Union[str, dict[str, Any]]]]]] -) -> list[dict[str, Any]]: - """ - Gets all list definitions from a data model and expands them to JSON if they are only referenced via an Excel file - - Args: - data_model: The data model (JSON) the lists are read from - - Returns: - A list of all expanded lists. It can be added to the root node of an ontology as lists section. - """ - - if 'project' not in data_model or 'lists' not in data_model['project']: - return [] - - lists = data_model['project']['lists'] - new_lists = [] - - for rootnode in lists: - nodes = rootnode.get('nodes') - listname = cast(Optional[str], rootnode.get('name')) - comments = cast(dict[str, Any], rootnode.get('comments')) - # check if the folder parameter is used - if isinstance(nodes, dict) and 'folder' in nodes and comments: - # get the Excel files from the folder and create the rootnode of the list - prepared_rootnode, excel_files = prepare_list_creation(nodes['folder'], listname, comments) - # create the list from the Excel files - finished_list = make_json_list_from_excel(prepared_rootnode, excel_files) - new_lists.append(finished_list) - else: - new_lists.append(rootnode) - - return new_lists diff --git a/knora/dsplib/utils/onto_create_lists.py b/knora/dsplib/utils/onto_create_lists.py index 3b0eeede8..9a3340510 100644 --- a/knora/dsplib/utils/onto_create_lists.py +++ b/knora/dsplib/utils/onto_create_lists.py @@ -1,141 +1,161 @@ import json -from typing import Any, Optional +from typing import Any, Optional, Tuple -from .expand_all_lists import expand_lists_from_excel -from .onto_validate import validate_ontology +from .excel_to_json_lists import expand_lists_from_excel +from .onto_validate import validate_project from ..models.connection import Connection from ..models.helpers import BaseError from ..models.listnode import ListNode from ..models.project import Project +from .shared_methods import login, try_network_action -def create_list_node(con: Connection, project: Project, parent_node: ListNode, nodes: list[dict[str, Any]]) -> list[ - dict[str, Any]]: +def _create_list_node( + con: Connection, + project: Project, + node: dict[str, Any], + parent_node: Optional[ListNode] = None +) -> Tuple[dict[str, Any], bool]: """ - Creates the list on the DSP server + Creates a list node on the DSP server, recursively scanning through all its subnodes, creating them as well. + Returns a tuple consisting of a dict and a bool. The dict contains the IRIs of the created list nodes. The bool + indicates if all nodes could be created or not. Args: - con: Connection to the DSP server - project: Project which the lists should be added to - parent_node: Root node of the list - nodes: List of nodes the list is made of + con: connection to the DSP server + project: project that holds the list where this node should be added to + node: the node to be created + parent_node: parent node of the node to be created (optional) Returns: - The list of all nodes with their names and respective IDs + dict of the form ``{nodename: {"id": node IRI, "nodes": {...}}}`` with the created list nodes, nested according to their hierarchy structure + True if all nodes could be created, False if any node could not be created """ - nodelist = [] - for node in nodes: - new_node = None - try: - new_node = ListNode(con=con, project=project, label=node["labels"], comments=node.get("comments"), - name=node["name"], - parent=parent_node).create() - except BaseError as err: - print(f"ERROR while trying to create list node '{node['name']}'. The error message was {err.message}") - exit(1) - if new_node: - # if node has child nodes, call the method recursively - if node.get("nodes"): - subnode_list = create_list_node(con, project, new_node, node["nodes"]) - nodelist.append({new_node.name: {"id": new_node.id, "nodes": subnode_list}}) - else: - nodelist.append({new_node.name: {"id": new_node.id}}) - return nodelist - - -def create_lists(input_file: str, lists_file: str, server: str, user: str, password: str, verbose: bool, - dump: bool = False) -> dict[str, Any]: + new_node = ListNode( + con=con, + project=project, + label=node["labels"], + comments=node.get("comments"), + name=node["name"], + parent=parent_node + ) + try: + new_node = try_network_action( + action=lambda: new_node.create(), + failure_msg=f"ERROR while trying to create list node '{node['name']}'." + ) + except BaseError as err: + print(err.message) + return {}, False + + # if node has child nodes, call the method recursively + if node.get("nodes"): + overall_success = True + subnode_list = [] + for subnode in node["nodes"]: + created_subnode, success = _create_list_node(con=con, project=project, node=subnode, parent_node=new_node) + subnode_list.append(created_subnode) + if not success: + overall_success = False + return {new_node.name: {"id": new_node.id, "nodes": subnode_list}}, overall_success + else: + return {new_node.name: {"id": new_node.id}}, True + + +def create_lists( + server: str, + user: str, + password: str, + project_definition: Optional[dict[str, Any]] = None, + input_file: Optional[str] = None, + dump: bool = False +) -> Tuple[dict[str, Any], bool]: """ - Handles the list creation + This method uploads the "lists" section of a JSON project definition file to a DSP server. If the JSON project file + is still unparsed, this method parses it, expands the Excel sheets that are referenced, and validates it. + The "lists" section of the parsed project definition is then uploaded to the DSP server. If a list with the same + name is already existing in this project on the DSP server, this list is skipped. + Returns a tuple consisting of a dict and a bool. The dict contains the IRIs of the created list nodes. If there are + no lists in the project definition, an empty dictionary is returned. The bool indicates if everything went smoothly + during the process. If a warning or error occurred (e.g. one of the lists already exists, or one of the nodes could + not be created), it is False. Args: - input_file: Path to the json data model file - lists_file: Output file for the list node names and their respective IRI + input_file: path to the JSON project file (will be validated, and Excel file references will be expanded) + project_definition: parsed JSON project file (must be valid, and the Excel file references must be expanded already) server: URL of the DSP server - user: Username (e-mail) for the DSP server, has to have the permissions to create an ontology + user: Username (e-mail) for the DSP server, must have the permissions to create a project password: Password of the user - verbose: Verbose output if True - dump: dumps the request as JSON (used for testing) + dump: if True, the request is dumped as JSON (used for testing) Returns: - list_root_nodes: Dictionary of node names and their respective IRI + dict of the form ``{nodename: {"id": IRI, "nodes": {...}}}`` with the created list nodes, nested according to their hierarchy structure + True if everything went smoothly, False if a warning or error occurred """ - # read the ontology from the input file - with open(input_file) as f: - onto_json_str = f.read() - - data_model = json.loads(onto_json_str) - - # expand all lists referenced in the list section of the data model - new_lists = expand_lists_from_excel(data_model) - - # add the newly created lists from Excel to the ontology - data_model["project"]["lists"] = new_lists - - # validate the ontology - if validate_ontology(data_model): - pass + overall_success = True + + if project_definition and not input_file: + # the "lists_to_create" can directly be taken from the "lists" section + lists_to_create = project_definition["project"].get("lists") + if not lists_to_create: + return {}, True + elif input_file and not project_definition: + # the file must be parsed, potential Excel file references expanded, and then, the file must be validated. + # Only then, the "lists_to_create" are in a safe state + with open(input_file) as f: + project_json_str = f.read() + project_definition = json.loads(project_json_str) + if not project_definition["project"].get("lists"): + return {}, True + lists_to_create, success = expand_lists_from_excel(project_definition["project"]["lists"]) + if not success: + overall_success = False + project_definition["project"]["lists"] = lists_to_create + if validate_project(project_definition, expand_lists=False): + print('JSON project file is syntactically correct and passed validation.') else: - exit(1) - - # Connect to the DaSCH Service Platform API - con = Connection(server) - con.login(user, password) + raise BaseError(f"ERROR: Must provide either project_definition or input_file. It's not possible to provide " + f"neither of them or both of them.") + # connect to the DSP server + con = login(server, user, password) if dump: con.start_logging() - # get the project which must exist - project: Optional[Project] = None + # retrieve the project + project_local = Project(con=con, shortcode=project_definition["project"]["shortcode"]) + project_remote = try_network_action( + action=lambda: project_local.read(), + failure_msg="ERROR while trying to create the lists: Project couldn't be read from the DSP server." + ) + + # retrieve existing lists try: - project = Project(con=con, shortcode=data_model["project"]["shortcode"]).read() + existing_lists: list[ListNode] = try_network_action( + action=lambda: ListNode.getAllLists(con=con, project_iri=project_remote.id), + failure_msg="WARNING: Unable to retrieve existing lists on DSP server. Cannot check if your lists are " + "already existing." + ) except BaseError as err: - print( - f"ERROR while trying to create the lists. Referenced project couldn't be read from the server. The error message was: {err.message}") - exit(1) - - # create the lists - if verbose: - print("Create lists...") - - all_lists: Optional[list[ListNode]] = ListNode.getAllLists(con, project.id) - lists = data_model["project"].get("lists") - list_root_nodes = {} - if lists: - for rootnode in lists: - rootnode_name = rootnode["name"] - # check if list already exists - list_exists: bool = False - if all_lists: - for list_item in all_lists: - if list_item.project == project.id and list_item.name == rootnode_name: - list_root_nodes[list_item.name] = {"id": list_item.id, "nodes": rootnode["nodes"]} - list_exists = True - if list_exists: - print(f"WARN List '{rootnode_name}' already exists. Skipping...") - continue - - if verbose: - print(f"Creating list '{rootnode_name}'.") - - root_list_node = None - try: - root_list_node = ListNode(con=con, project=project, label=rootnode["labels"], - comments=rootnode.get("comments"), - name=rootnode_name).create() - except BaseError as err: - print(f"ERROR while trying to create the list '{rootnode_name}'. The error message was: {err.message}") - exit(1) - except Exception as exception: - print( - f"ERROR while trying to create the list '{rootnode_name}'. The error message was: {exception}") - exit(1) - if rootnode.get("nodes") and root_list_node and project: - list_nodes = create_list_node(con, project, root_list_node, rootnode["nodes"]) - list_root_nodes[rootnode["name"]] = {"id": root_list_node.id, "nodes": list_nodes} - - with open(lists_file, "w", encoding="utf-8") as fp: - json.dump(list_root_nodes, fp, indent=4, sort_keys=True) - print(f"The IRI for the created nodes can be found in '{lists_file}'.") - - return list_root_nodes + print(err.message) + existing_lists = [] + overall_success = False + + # create new lists + current_project_lists = {} + for new_list in lists_to_create: + # if list exists already, add it to "current_project_lists" (for later usage), then skip it + existing_list = [x for x in existing_lists if x.project == project_remote.id and x.name == new_list["name"]] + if existing_list: + current_project_lists[existing_list[0].name] = {"id": existing_list[0].id, "nodes": new_list["nodes"]} + print(f"\tWARNING: List '{new_list['name']}' already exists on the DSP server. Skipping...") + overall_success = False + continue + + created_list, success = _create_list_node(con=con, project=project_remote, node=new_list) + current_project_lists.update(created_list) + if not success: + overall_success = False + print(f"\tCreated list '{new_list['name']}'.") + + return current_project_lists, overall_success diff --git a/knora/dsplib/utils/onto_create_ontology.py b/knora/dsplib/utils/onto_create_ontology.py index 72a418272..fb1e0d715 100644 --- a/knora/dsplib/utils/onto_create_ontology.py +++ b/knora/dsplib/utils/onto_create_ontology.py @@ -2,7 +2,7 @@ of the project, the creation of groups, users, lists, resource classes, properties and cardinalities.""" import json import re -from typing import Union, Optional, Any +from typing import Any, cast, Tuple from knora.dsplib.models.connection import Connection from knora.dsplib.models.group import Group @@ -13,319 +13,316 @@ from knora.dsplib.models.propertyclass import PropertyClass from knora.dsplib.models.resourceclass import ResourceClass from knora.dsplib.models.user import User -from knora.dsplib.utils.expand_all_lists import expand_lists_from_excel +from knora.dsplib.utils.excel_to_json_lists import expand_lists_from_excel from knora.dsplib.utils.onto_create_lists import create_lists -from knora.dsplib.utils.onto_validate import validate_ontology +from knora.dsplib.utils.onto_validate import validate_project +from knora.dsplib.utils.shared_methods import login, try_network_action -def login(server: str, user: str, password: str) -> Connection: +def _create_project(con: Connection, project_definition: dict[str, Any]) -> Project: """ - Logs in and returns the active connection - - Args: - server: URL of the DSP server to connect to - user: Username (e-mail) - password: Password of the user - - Return: - Connection instance - """ - con = Connection(server) - con.login(user, password) - return con - - -def create_project(con: Connection, data_model: dict[str, Any], verbose: bool) -> Project: - """ - Creates a project on a DSP server with information provided in the data_model + Creates a project on a DSP server from a parsed JSON project file. Raises a BaseError if it is not + possible to create the project. Args: con: connection instance to connect to the DSP server - data_model: The data model as JSON - verbose: Prints out more information if set to True + project_definition: a parsed JSON project file Returns: created project """ - project_shortcode = data_model["project"]["shortcode"] - project_shortname = data_model["project"]["shortname"] - - try: - project = Project(con=con, - shortcode=data_model["project"]["shortcode"], - shortname=data_model["project"]["shortname"], - longname=data_model["project"]["longname"], - description=LangString(data_model["project"].get("descriptions")), - keywords=set(data_model["project"].get("keywords")), - selfjoin=False, - status=True).create() - if verbose: - print(f"Created project '{project_shortname}' ({project_shortcode}).") - return project - except BaseError as err: - print( - f"ERROR while trying to create project '{project_shortname}' ({project_shortcode}). The error message was: {err.message}") - exit(1) - except Exception as exception: - print( - f"ERROR while trying to create project '{project_shortname}' ({project_shortcode}). The error message was: {exception}") - exit(1) - - -def update_project(project: Project, data_model: dict[str, Any], verbose: bool) -> Project: + project_local = Project( + con=con, + shortcode=project_definition["project"]["shortcode"], + shortname=project_definition["project"]["shortname"], + longname=project_definition["project"]["longname"], + description=LangString(project_definition["project"].get("descriptions")), + keywords=set(project_definition["project"].get("keywords")), + selfjoin=False, + status=True + ) + project_remote: Project = try_network_action( + action=lambda: project_local.create(), + failure_msg=f"ERROR: Cannot create project '{project_definition['project']['shortname']}' " + f"({project_definition['project']['shortcode']}) on DSP server." + ) + return project_remote + + +def _update_project(project: Project, project_definition: dict[str, Any], verbose: bool) -> Project: """ - Updates a project on a DSP server with information provided in the data_model + Updates a project on a DSP server from a JSON project file. Only the longname, description and keywords will be + updated. Raises a BaseError if the project cannot be updated. Args: - project: The project to be updated - data_model: The data model as JSON - verbose: Prints out more information if set to True + project: the project to be updated (must exist on the DSP server) + project_definition: a parsed JSON project file with the same shortname and shortcode than the existing project Returns: updated project """ - project_shortcode = data_model["project"]["shortcode"] - project_shortname = data_model["project"]["shortname"] - project.longname = data_model["project"]["longname"] - project.description = data_model["project"].get("descriptions") - project.keywords = data_model["project"].get("keywords") - try: - updated_project = project.update() - if verbose: - print(f"Updated project '{project_shortname}' ({project_shortcode}).") - return updated_project - except BaseError as err: - print( - f"ERROR while trying to update project '{project_shortname}' ({project_shortcode}). The error message was: {err.message}") - exit(1) - except Exception as exception: - print( - f"ERROR while trying to update project '{project_shortname}' ({project_shortcode}). The error message was: {exception}") - exit(1) + project.longname = project_definition["project"]["longname"] + project.description = project_definition["project"].get("descriptions") + project.keywords = project_definition["project"].get("keywords") + project_remote: Project = try_network_action( + action=lambda: project.update(), + failure_msg=f"WARNING: Could not update project '{project_definition['project']['shortname']}' " + f"({project_definition['project']['shortcode']})." + ) + if verbose: + print(f"\tUpdated project '{project_definition['project']['shortname']}' ({project_definition['project']['shortcode']}).") + return project_remote -def create_groups(con: Connection, groups: list[dict[str, str]], project: Project, verbose: bool) -> dict[str, Group]: +def _create_groups(con: Connection, groups: list[dict[str, str]], project: Project) -> Tuple[dict[str, Group], bool]: """ - Creates group(s) on a DSP server from a list of group definitions + Creates groups on a DSP server from the "groups" section of a JSON project file. If a group cannot be created, it is + skipped and a warning is printed, but such a group will still be part of the returned dict. + Returns a tuple consisting of a dict and a bool. The dict contains the groups that have successfully been created + (or already exist). The bool indicates if everything went smoothly during the process. If a warning or error + occurred, it is False. Args: con: connection instance to connect to the DSP server - groups: List of definitions of the groups (JSON) to be created - project: Project the group(s) should be added to - verbose: Prints out more information if set to True + groups: "groups" section of a parsed JSON project file + project: Project the group(s) should be added to (must exist on DSP server) Returns: - Dict with group names and groups + dict of the form ``{group name: group object}`` with the groups that have successfully been created (or already exist). Empty dict if no group was created. + True if everything went smoothly, False if a warning or error occurred """ - new_groups: dict[str, Group] = {} + overall_success = True + current_project_groups: dict[str, Group] = {} + try: + remote_groups: list[Group] = try_network_action( + action=lambda: Group.getAllGroupsForProject(con=con, proj_shortcode=project.shortcode), + failure_msg="WARNING: Unable to check if group names are already existing on DSP server, because it is " + "not possible to retrieve the remote groups from DSP server." + ) + except BaseError as err: + print(err.message) + remote_groups = [] + overall_success = False + for group in groups: group_name = group["name"] - # check if the group already exists, skip if so - all_groups: Optional[list[Group]] = Group.getAllGroups(con) - - group_exists: bool = False - if all_groups: - group_exists = any(group_item.name == group_name for group_item in all_groups) - - if group_exists: - print(f"WARN Group name '{group_name}' already in use. Skipping...") + # if the group already exists, add it to "current_project_groups" (for later usage), then skip it + remotely_existing_group = [g for g in remote_groups if g.name == group_name] + if remotely_existing_group: + current_project_groups[group_name] = remotely_existing_group[0] + print(f"\tWARNING: Group name '{group_name}' already exists on the DSP server. Skipping...") + overall_success = False continue - # check if status is defined, set default value if not - group_status: Optional[str] = group.get("status") - group_status_bool = True - if isinstance(group_status, str): - group_status_bool = json.loads(group_status.lower()) # lower() converts string to boolean - - # check if selfjoin is defined, set default value if not - group_selfjoin: Optional[str] = group.get("selfjoin") - group_selfjoin_bool = False - if isinstance(group_selfjoin, str): - group_selfjoin_bool = json.loads(group_selfjoin.lower()) # lower() converts string to boolean - # create the group + group_local = Group( + con=con, + name=group_name, + descriptions=LangString(group["descriptions"]), + project=project, + status=group.get("status", True), + selfjoin=group.get("selfjoin", False) + ) try: - new_group: Group = Group(con=con, - name=group_name, - descriptions=LangString(group["descriptions"]), - project=project, - status=group_status_bool, - selfjoin=group_selfjoin_bool).create() - if verbose: - print(f"Created group '{group_name}'.") - if new_group.name: - new_groups[new_group.name] = new_group - + group_remote: Group = try_network_action( + action=lambda: group_local.create(), + failure_msg=f"\tWARNING: Unable to create group '{group_name}'." + ) except BaseError as err: - print(f"ERROR while trying to create group '{group_name}'. The error message was: {err.message}") - exit(1) - except Exception as exception: - print(f"ERROR while trying to create group '{group_name}'. The error message was: {exception}") - exit(1) - return new_groups + print(err.message) + overall_success = False + continue + + current_project_groups[group_remote.name] = group_remote + print(f"\tCreated group '{group_name}'.") + return current_project_groups, overall_success -def create_users(con: Connection, users: list[dict[str, str]], groups: dict[str, Group], project: Project, - verbose: bool) -> None: + +def _create_users( + con: Connection, + users: list[dict[str, str]], + current_project_groups: dict[str, Group], + current_project: Project, + verbose: bool +) -> bool: """ - Creates user(s) on a DSP server from a list of user definitions + Creates users on a DSP server from the "users" section of a JSON project file. If a user cannot be created, a + warning is printed and the user is skipped. Args: con: connection instance to connect to the DSP server - users: List of definitions of the users (JSON) to be created - groups: Dict with group definitions defined inside the actual ontology - project: Project the user(s) should be added to + users: "users" section of a parsed JSON project file + current_project_groups: groups defined in the current project (dict of the form {group name - group object}). These groups must exist on the DSP server. + current_project: "project" object of the current project (must exist on DSP server) verbose: Prints more information if set to True Returns: - None + True if all users could be created without any problems. False if a warning/error occurred. """ + overall_success = True for user in users: username = user["username"] - # check if the user already exists, skip if so - maybe_user: Optional[User] = None + # skip the user if he already exists try: - maybe_user = User(con, email=user["email"]).read() + try_network_action( + action=lambda: User(con, email=user["email"]).read(), + failure_msg="" + ) + print(f"\tWARNING: User '{username}' already exists on the DSP server. Skipping...") + overall_success = False + continue except BaseError: pass - if maybe_user: - print(f"WARN User '{username}' already exists. Skipping...") - continue - sysadmin = False + # if "groups" is provided, add user to the group(s) group_ids: set[str] = set() - project_info: dict[str, bool] = {} + sysadmin = False + remote_groups: list[Group] = [] + for full_group_name in user.get("groups", []): + # full_group_name has the form '[project_shortname]:group_name' or 'SystemAdmin' + if ":" not in full_group_name and full_group_name != "SystemAdmin": + print(f"\tWARNING: User {username} cannot be added to group {full_group_name}, because such a " + f"group doesn't exist.") + overall_success = False + continue - # if "groups" is provided add user to the group(s) - user_groups = user.get("groups") - if user_groups: - all_groups: Optional[list[Group]] = Group.getAllGroups(con) - for full_group_name in user_groups: + if full_group_name == "SystemAdmin": + sysadmin = True if verbose: - print(f"Add user '{username}' to group '{full_group_name}'.") - # full_group_name has the form '[project_shortname]:group_name' or 'SystemAdmin' - # if project_shortname is omitted, the group belongs to the current project - tmp_group_name: Union[list[str], str] = full_group_name.split( - ":") if ":" in full_group_name else full_group_name - - if len(tmp_group_name) == 2: - project_shortname = tmp_group_name[0] - group_name = tmp_group_name[1] - - group: Optional[Group] = None - if project_shortname: # full_group_name refers to an already existing group on DSP - # check that group exists - if all_groups: - for g in all_groups: - if g.project == project.id and g.name == group_name: - group = g - else: - print(f"WARN '{group_name}' is referring to a group on DSP but no groups found.") - - else: # full_group_name refers to a group inside the same ontology - group = groups.get(group_name) - if group is None: - print(f"WARN Group '{group_name}' not found in actual ontology.") - else: - if isinstance(group.id, str): - group_ids.add(group.id) - elif tmp_group_name == "SystemAdmin": - sysadmin = True - else: - print(f"WARN Provided group '{full_group_name}' for user '{username}' is not valid. Skipping...") + print(f"\tAdded user '{username}' to group 'SystemAdmin'.") + continue - # if "projects" is provided, add user to the projects(s) - user_projects = user.get("projects") - if user_projects: - all_projects: list[Project] = project.getAllProjects(con) - for full_project_name in user_projects: - if verbose: - print(f"Add user '{username}' to project '{full_project_name}'.") - # full_project_name has the form '[project_name]:member' or '[project_name]:admin' - # if project_name is omitted, the user is added to the current project - tmp_group_name = full_project_name.split(":") - - if not len(tmp_group_name) == 2: - print( - f"WARN Provided project '{full_project_name}' for user '{username}' is not valid. Skipping...") + # all other cases (":" in full_group_name) + project_shortname, group_name = full_group_name.split(":") + if not project_shortname: + # full_group_name refers to a group inside the same project + if group_name not in current_project_groups: + print(f"\tWARNING: User {username} cannot be added to group {full_group_name}, because " + f"such a group doesn't exist.") + overall_success = False continue + group = current_project_groups[group_name] + else: + # full_group_name refers to an already existing group on DSP + try: + # "remote_groups" might be available from a previous loop cycle + remote_groups = remote_groups or try_network_action( + action=lambda: Group.getAllGroups(con=con), + failure_msg=f"\tWARNING: User '{username}' is referring to the group {full_group_name} that " + f"exists on the DSP server, but no groups could be retrieved from the DSP server." + ) + except BaseError as err: + print(err.message) + overall_success = False + continue + existing_group = [g for g in remote_groups if g.project == current_project.id and g.name == group_name] + if not existing_group: + print(f"\tWARNING: User {username} cannot be added to group {full_group_name}, because " + f"such a group doesn't exist.") + overall_success = False + continue + group = existing_group[0] - project_name = tmp_group_name[0] - project_role = tmp_group_name[1] - - in_project: Optional[Project] = None + group_ids.add(group.id) + if verbose: + print(f"\tAdded user '{username}' to group '{full_group_name}'.") - if project_name: # project_name is provided - # check that project exists - for p in all_projects: - if p.shortname == project_name: - in_project = p + # if "projects" is provided, add user to the project(s) + project_info: dict[str, bool] = {} + remote_projects: list[Project] = [] + for full_project_name in user.get("projects", []): + # full_project_name has the form '[project_name]:member' or '[project_name]:admin' + if ":" not in full_project_name: + print(f"\tWARNING: Provided project '{full_project_name}' for user '{username}' is not valid. " + f"Skipping...") + overall_success = False + continue - else: # no project_name provided - in_project = project + project_name, project_role = full_project_name.split(":") + if not project_name: + # full_project_name refers to the current project + in_project = current_project + else: + # full_project_name refers to an already existing project on DSP + try: + # "remote_projects" might be available from a previous loop cycle + remote_projects = remote_projects or try_network_action( + action=lambda: current_project.getAllProjects(con=con), + failure_msg=f"\tWARNING: User '{username}' cannot be added to the projects {user['projects']} " + f"because the projects cannot be retrieved from the DSP server." + ) + except BaseError as err: + print(err.message) + overall_success = False + continue + in_project_list = [p for p in remote_projects if p.shortname == project_name] + if not in_project_list: + print(f"\tWARNING: Provided project '{full_project_name}' for user '{username}' is not valid. " + f"Skipping...") + overall_success = False + continue + in_project = in_project_list[0] - if in_project and isinstance(in_project.id, str): - if project_role == "admin": - project_info[in_project.id] = True - else: - project_info[in_project.id] = False + project_info[in_project.id] = bool(project_role == "admin") + if verbose: + print(f"\tAdded user '{username}' as {project_role} to project '{in_project.shortname}'.") # create the user - user_status: Optional[str] = user.get("status") - user_status_bool = True - if isinstance(user_status, str): - user_status_bool = json.loads(user_status.lower()) # lower() converts string to boolean + user_local = User( + con=con, + username=user["username"], + email=user["email"], + givenName=user["givenName"], + familyName=user["familyName"], + password=user["password"], + status=user.get("status", True), + lang=user.get("lang", "en"), + sysadmin=sysadmin, + in_projects=project_info, + in_groups=group_ids + ) try: - User(con=con, - username=user["username"], - email=user["email"], - givenName=user["givenName"], - familyName=user["familyName"], - password=user["password"], - status=user_status_bool, - lang=user["lang"] if user.get("lang") else "en", - sysadmin=sysadmin, - in_projects=project_info, - in_groups=group_ids).create() - if verbose: - print(f"Created user {username}.") + try_network_action( + action=lambda: user_local.create(), + failure_msg=f"\tWARNING: Unable to create user '{username}'." + ) except BaseError as err: - print(f"ERROR while trying to create user '{username}'. The error message was: {err.message}") - exit(1) - except Exception as exception: - print(f"ERROR while trying to create user '{username}'. The error message was: {exception}") - exit(1) + print(err.message) + overall_success = False + continue + print(f"\tCreated user '{username}'.") + + return overall_success -def sort_resources(unsorted_resources: list[dict[str, Any]], onto_name: str) -> list[dict[str, Any]]: +def _sort_resources(unsorted_resources: list[dict[str, Any]], onto_name: str) -> list[dict[str, Any]]: """ - This method sorts the resource classes in an ontology according their inheritance order - (parent classes first). + This method sorts the resource classes in an ontology according to their inheritance order (parent classes first). Args: - unsorted_resources: list of resources from a JSON ontology definition + unsorted_resources: list of resources from a parsed JSON project file onto_name: name of the onto Returns: sorted list of resource classes """ - # do not modify the original unsorted_resources, which points to the original onto file + # do not modify the original unsorted_resources, which points to the original JSON project file resources_to_sort = unsorted_resources.copy() sorted_resources: list[dict[str, Any]] = list() ok_resource_names: list[str] = list() while len(resources_to_sort) > 0: - # inside the for loop, resources_to_sort is modified, so a copy must be made - # to iterate over + # inside the for loop, resources_to_sort is modified, so a copy must be made to iterate over for res in resources_to_sort.copy(): res_name = f'{onto_name}:{res["name"]}' parent_classes = res['super'] if isinstance(parent_classes, str): parent_classes = [parent_classes] parent_classes = [re.sub(r'^:([^:]+)$', f'{onto_name}:\\1', elem) for elem in parent_classes] - parent_classes_ok = [not parent.startswith(onto_name) or parent in ok_resource_names for parent in parent_classes] + parent_classes_ok = [not p.startswith(onto_name) or p in ok_resource_names for p in parent_classes] if all(parent_classes_ok): sorted_resources.append(res) ok_resource_names.append(res_name) @@ -333,33 +330,32 @@ def sort_resources(unsorted_resources: list[dict[str, Any]], onto_name: str) -> return sorted_resources -def sort_prop_classes(unsorted_prop_classes: list[dict[str, Any]], onto_name: str) -> list[dict[str, Any]]: +def _sort_prop_classes(unsorted_prop_classes: list[dict[str, Any]], onto_name: str) -> list[dict[str, Any]]: """ In case of inheritance, parent properties must be uploaded before their children. This method sorts the properties. Args: - unsorted_prop_classes: list of properties from a JSON ontology definition + unsorted_prop_classes: list of properties from a parsed JSON project file onto_name: name of the onto Returns: sorted list of properties """ - # do not modify the original unsorted_prop_classes, which points to the original onto file + # do not modify the original unsorted_prop_classes, which points to the original JSON project file prop_classes_to_sort = unsorted_prop_classes.copy() sorted_prop_classes: list[dict[str, Any]] = list() ok_propclass_names: list[str] = list() while len(prop_classes_to_sort) > 0: - # inside the for loop, resources_to_sort is modified, so a copy must be made - # to iterate over + # inside the for loop, resources_to_sort is modified, so a copy must be made to iterate over for prop in prop_classes_to_sort.copy(): prop_name = f'{onto_name}:{prop["name"]}' parent_classes = prop.get('super', 'hasValue') if isinstance(parent_classes, str): parent_classes = [parent_classes] parent_classes = [re.sub(r'^:([^:]+)$', f'{onto_name}:\\1', elem) for elem in parent_classes] - parent_classes_ok = [not parent.startswith(onto_name) or parent in ok_propclass_names for parent in parent_classes] + parent_classes_ok = [not p.startswith(onto_name) or p in ok_propclass_names for p in parent_classes] if all(parent_classes_ok): sorted_prop_classes.append(prop) ok_propclass_names.append(prop_name) @@ -367,244 +363,246 @@ def sort_prop_classes(unsorted_prop_classes: list[dict[str, Any]], onto_name: st return sorted_prop_classes -def create_ontology(input_file: str, - lists_file: str, - server: str, - user_mail: str, - password: str, - verbose: bool, - dump: bool) -> None: +def create_project( + input_file: str, + server: str, + user_mail: str, + password: str, + verbose: bool, + dump: bool +) -> bool: """ - Creates the ontology and all its parts from a JSON input file on a DSP server + Creates a project from a JSON project file on a DSP server. A project must contain at least one ontology, and it may + contain lists, users, and groups. + Returns True if everything went smoothly during the process, False if a warning or error occurred. Args: - input_file: The input JSON file from which the ontology and its parts should be created - lists_file: The file which the list output (list node ID) is written to - server: The DSP server which the ontology should be created on - user_mail: The user (e-mail) which the ontology should be created with (requesting user) - password: The password for the user (requesting user) - verbose: Prints more information if set to True - dump: Dumps test files (JSON) for DSP API requests if set to True + input_file: the path to the JSON file from which the project and its parts should be created + server: the URL of the DSP server on which the project should be created + user_mail: a username (e-mail) who has the permission to create a project + password: the user's password + verbose: prints more information if set to True + dump: dumps test files (JSON) for DSP API requests if set to True Returns: - None + True if everything went smoothly, False if a warning or error occurred """ knora_api_prefix = "knora-api:" + overall_success = True + success = True - # read the ontology from the input file + # create project + ################ with open(input_file) as f: - onto_json_str = f.read() - - data_model = json.loads(onto_json_str) + project_json_str = f.read() + project_definition = json.loads(project_json_str) + print(f"Create project '{project_definition['project']['shortname']}' " + f"({project_definition['project']['shortcode']})...") - # expand all lists referenced in the list section of the data model and add them to the ontology - data_model["project"]["lists"] = expand_lists_from_excel(data_model) + # expand all lists referenced in the "lists" section of the project, and add them to the project + new_lists, success = expand_lists_from_excel(project_definition["project"].get("lists", [])) + if new_lists: + project_definition["project"]["lists"] = new_lists + if not success: + overall_success = False - # validate the ontology - if not validate_ontology(data_model): - exit(1) + if validate_project(project_definition, expand_lists=False): + print('\tJSON project file is syntactically correct and passed validation.') - # make the connection to the server con = login(server=server, user=user_mail, password=password) - if dump: con.start_logging() # read the prefixes of external ontologies that may be used - context = Context(data_model.get("prefixes") or {}) + context = Context(project_definition.get("prefixes") or {}) - # check if the project exists - project = None + # if project exists, update it, otherwise create it + project_local = Project(con=con, shortcode=project_definition["project"]["shortcode"]) try: - project = Project(con=con, shortcode=data_model["project"]["shortcode"]).read() + project_remote: Project = try_network_action( + action=lambda: project_local.read(), + failure_msg="" + ) + print(f"\tWARNING: Project '{project_remote.shortname}' ({project_remote.shortcode}) already exists on the DSP " + f"server. Updating it...") + overall_success = False + try: + project_remote = _update_project(project=project_remote, project_definition=project_definition, verbose=verbose) + except BaseError as err: + print(err.message) except BaseError: - pass + project_remote = _create_project(con=con, project_definition=project_definition) + print(f"\tCreated project '{project_remote.shortname}' ({project_remote.shortcode}).") + + # create the lists + ################## + list_root_nodes: dict[str, Any] = {} + if project_definition["project"].get("lists"): + print("Create lists...") + list_root_nodes, success = create_lists(server=server, user=user_mail, password=password, project_definition=project_definition) + if not success: + overall_success = False + + # create the groups + ################### + current_project_groups: dict[str, Group] = {} + if project_definition["project"].get("groups"): + print("Create groups...") + current_project_groups, success = _create_groups( + con=con, + groups=project_definition["project"]["groups"], + project=project_remote + ) + if not success: + overall_success = False + + # create or update the users + ############################ + if project_definition["project"].get("users"): + print("Create users...") + success = _create_users( + con=con, + users=project_definition["project"]["users"], + current_project_groups=current_project_groups, + current_project=project_remote, + verbose=verbose + ) + if not success: + overall_success = False - # if project exists, update it - if project: - print(f"Project '{data_model['project']['shortcode']}' already exists. Updating it...") - updated_project: Project = update_project(project=project, data_model=data_model, verbose=verbose) - if verbose: - updated_project.print() - - # if project does not exist, create it - else: - if verbose: - print("Create project...") - project = create_project(con=con, data_model=data_model, verbose=verbose) - - # create the list(s), skip if it already exists - list_root_nodes = {} - if data_model["project"].get("lists"): - if verbose: - print("Create lists...") - list_root_nodes = create_lists(input_file, lists_file, server, user_mail, password, verbose) - - # create the group(s), skip if it already exists - new_groups = {} - if data_model["project"].get("groups"): - if verbose: - print("Create groups...") - new_groups = create_groups(con=con, groups=data_model["project"]["groups"], project=project, verbose=verbose) + # create the ontologies + ####################### + print("Create ontologies...") + all_ontologies: list[Ontology] = try_network_action( + action=lambda: Ontology.getAllOntologies(con=con), + failure_msg="WARNING: Unable to retrieve remote ontologies. Cannot check if your ontology already exists." + ) + for ontology in project_definition.get("project").get("ontologies"): + if ontology["name"] in [onto.name for onto in all_ontologies]: + print(f"\tWARNING: Ontology '{ontology['name']}' already exists on the DSP server. Skipping...") + overall_success = False + continue - # create or update the user(s), skip if it already exists - if data_model["project"].get("users"): + print(f"Create ontology '{ontology['name']}'...") + ontology_local = Ontology( + con=con, + project=project_remote, + label=ontology["label"], + name=ontology["name"] + ) + ontology_remote: Ontology = try_network_action( + action=lambda: ontology_local.create(), + failure_msg=f"ERROR while trying to create ontology '{ontology['name']}'." + ) + context.add_context(ontology_remote.name, ontology_remote.id + ('#' if not ontology_remote.id.endswith('#') else '')) + last_modification_date = ontology_remote.lastModificationDate if verbose: - print("Create users...") - create_users(con=con, users=data_model["project"]["users"], groups=new_groups, project=project, - verbose=verbose) - - # create the ontologies - if verbose: - print("Create ontologies...") - for ontology in data_model.get("project").get("ontologies"): - new_ontology = None - last_modification_date = None - ontology_name = ontology["name"] - try: - new_ontology = Ontology(con=con, - project=project, - label=ontology["label"], - name=ontology_name).create() - context.add_context(new_ontology.name, new_ontology.id + ('#' if not new_ontology.id.endswith('#') else '')) - last_modification_date = new_ontology.lastModificationDate - if verbose: - print(f"Created ontology '{ontology_name}'.") - except BaseError as err: - print( - f"ERROR while trying to create ontology '{ontology_name}'. The error message was: {err.message}") - exit(1) - except Exception as exception: - print(f"ERROR while trying to create ontology '{ontology_name}'. The error message was: {exception}") - exit(1) + print(f"\tCreated ontology '{ontology['name']}'.") # add the prefixes defined in the json file - for prefix, ontology_info in context: - if prefix not in new_ontology.context and ontology_info: - s = ontology_info.iri + ("#" if ontology_info.hashtag else "") - new_ontology.context.add_context(prefix, s) + for onto_prefix, onto_info in context: + if onto_info and onto_prefix not in ontology_remote.context: + onto_iri = onto_info.iri + ("#" if onto_info.hashtag else "") + ontology_remote.context.add_context(onto_prefix, onto_iri) # create the empty resource classes + print("\tCreate resource classes...") new_res_classes: dict[str, ResourceClass] = {} - sorted_resources = sort_resources(ontology["resources"], ontology["name"]) + sorted_resources = _sort_resources(ontology["resources"], ontology["name"]) for res_class in sorted_resources: - res_name = res_class.get("name") - super_classes = res_class.get("super") + super_classes = res_class["super"] if isinstance(super_classes, str): super_classes = [super_classes] - res_label = LangString(res_class.get("labels")) - res_comment = res_class.get("comments") - if res_comment: - res_comment = LangString(res_comment) - # if no cardinalities are submitted, don't create the class - if not res_class.get("cardinalities"): - print(f"ERROR while trying to add cardinalities to class '{res_name}'. No cardinalities submitted. At" - f"least one direct cardinality is required to create a class with dsp-tools.") - continue - - new_res_class: Optional[ResourceClass] = None + res_class_local = ResourceClass( + con=con, + context=ontology_remote.context, + ontology_id=ontology_remote.id, + name=res_class["name"], + superclasses=super_classes, + label=LangString(res_class.get("labels")), + comment=LangString(res_class.get("comments")) if res_class.get("comments") else None + ) try: - last_modification_date, new_res_class = ResourceClass(con=con, - context=new_ontology.context, - ontology_id=new_ontology.id, - name=res_name, - superclasses=super_classes, - label=res_label, - comment=res_comment).create( - last_modification_date) - except BaseError as err: - print( - f"ERROR while trying to create resource class '{res_name}'. The error message was: {err.message}") - except Exception as exception: - print( - f"ERROR while trying to create resource class '{res_name}'. The error message was: {exception}") - - if new_res_class: - if isinstance(new_res_class.id, str): - new_res_classes[new_res_class.id] = new_res_class - new_ontology.lastModificationDate = last_modification_date - + last_modification_date, res_class_remote = try_network_action( + action=lambda: res_class_local.create(last_modification_date=last_modification_date), + failure_msg=f"WARNING: Unable to create resource class '{res_class['name']}'." + ) + res_class_remote = cast(ResourceClass, res_class_remote) + new_res_classes[res_class_remote.id] = res_class_remote + ontology_remote.lastModificationDate = last_modification_date if verbose: - print("Created resource class:") - new_res_class.print() + print(f"\tCreated resource class '{res_class['name']}'") + except BaseError as err: + print(err.message) + overall_success = False # create the property classes - sorted_prop_classes = sort_prop_classes(ontology["properties"], ontology["name"]) + print("\tCreate property classes...") + sorted_prop_classes = _sort_prop_classes(ontology["properties"], ontology["name"]) + new_prop_classes: dict[str, PropertyClass] = {} for prop_class in sorted_prop_classes: - prop_name = prop_class.get("name") - prop_label = LangString(prop_class.get("labels")) - # get the super-property/ies if defined, valid forms are: + # get the super-property/ies, valid forms are: # - "prefix:super-property" : fully qualified name of property in another ontology. The prefix has to be # defined in the prefixes part. + # - ":super-property" : super-property defined in current ontology # - "super-property" : super-property defined in the knora-api ontology # - if omitted, "knora-api:hasValue" is assumed - if prop_class.get("super"): super_props = [] for super_class in prop_class.get("super"): - if ':' in super_class: - super_props.append(super_class) + if ":" in super_class: + prefix, _class = super_class.split(":") + super_props.append(super_class if prefix else f"{ontology_remote.name}:{_class}") else: super_props.append(knora_api_prefix + super_class) else: super_props = ["knora-api:hasValue"] - # get the "object" if defined, valid forms are: + # get the "object", valid forms are: # - "prefix:object_name" : fully qualified object. The prefix has to be defined in the prefixes part. # - ":object_name" : The object is defined in the current ontology. # - "object_name" : The object is defined in "knora-api" - - if prop_class.get("object"): - tmp_group_name = prop_class.get("object").split(':') - if len(tmp_group_name) > 1: - if tmp_group_name[0]: - prop_object = prop_class.get("object") # fully qualified name - else: - prop_object = new_ontology.name + ':' + tmp_group_name[1] # object refers to actual ontology - else: - prop_object = knora_api_prefix + prop_class.get("object") # object refers to knora-api + if ":" in prop_class["object"]: + prefix, _object = prop_class["object"].split(':') + prop_object = f"{prefix}:{_object}" if prefix else f"{ontology_remote.name}:{_object}" else: - prop_object = None - prop_subject = prop_class.get("subject") - gui_element = prop_class.get("gui_element") + prop_object = knora_api_prefix + prop_class["object"] + gui_attributes = prop_class.get("gui_attributes") if gui_attributes and gui_attributes.get("hlist"): gui_attributes["hlist"] = "<" + list_root_nodes[gui_attributes["hlist"]]["id"] + ">" - prop_comment = prop_class.get("comments") - if prop_comment: - prop_comment = LangString(prop_comment) - new_prop_class = None + prop_class_local = PropertyClass( + con=con, + context=ontology_remote.context, + label=LangString(prop_class.get("labels")), + name=prop_class["name"], + ontology_id=ontology_remote.id, + superproperties=super_props, + object=prop_object, + subject=prop_class.get("subject"), + gui_element="salsah-gui:" + prop_class["gui_element"], + gui_attributes=gui_attributes, + comment=LangString(prop_class["comments"]) if prop_class.get("comments") else None + ) try: - last_modification_date, new_prop_class = PropertyClass(con=con, - context=new_ontology.context, - label=prop_label, - name=prop_name, - ontology_id=new_ontology.id, - superproperties=super_props, - object=prop_object, - subject=prop_subject, - gui_element="salsah-gui:" + gui_element, - gui_attributes=gui_attributes, - comment=prop_comment).create( - last_modification_date) - except BaseError as err: - print( - f"ERROR while trying to create property class '{prop_name}'. The error message was: {err.message}" + last_modification_date, prop_class_remote = try_network_action( + action=lambda: prop_class_local.create(last_modification_date=last_modification_date), + failure_msg=f"WARNING: Unable to create property class '{prop_class['name']}'." ) - except Exception as exception: - print( - f"ERROR while trying to create property class '{prop_name}'. The error message was: {exception}") - - if new_prop_class: - new_ontology.lastModificationDate = last_modification_date + prop_class_remote = cast(PropertyClass, prop_class_remote) + new_prop_classes[prop_class_remote.id] = prop_class_remote + ontology_remote.lastModificationDate = last_modification_date if verbose: - print("Created property:") - new_prop_class.print() + print(f"\tCreated property class '{prop_class['name']}'") + except BaseError as err: + print(err.message) + overall_success = False # Add cardinalities to class + print("\tAdd cardinalities to resource classes...") switcher = { "1": Cardinality.C_1, "0-1": Cardinality.C_0_1, @@ -613,37 +611,53 @@ def create_ontology(input_file: str, } for res_class in ontology.get("resources"): - if res_class.get("cardinalities"): - for card_info in res_class.get("cardinalities"): - rc = new_res_classes.get(new_ontology.id + "#" + res_class.get("name")) - cardinality = switcher[card_info.get("cardinality")] - prop_name_for_card = card_info.get("propname") - tmp_group_name = prop_name_for_card.split(":") - if len(tmp_group_name) > 1: - if tmp_group_name[0]: - prop_id = prop_name_for_card # fully qualified name - else: - prop_id = new_ontology.name + ":" + tmp_group_name[1] # prop name refers to actual ontology - else: - prop_id = knora_api_prefix + prop_name_for_card # prop name refers to knora-api - - if rc: - try: - last_modification_date = rc.addProperty( - property_id=prop_id, - cardinality=cardinality, - gui_order=card_info.get("gui_order"), - last_modification_date=last_modification_date) - if verbose: - print(f"{res_class['name']}: Added property '{prop_name_for_card}'") - - except BaseError as err: - print( - f"ERROR while trying to add cardinality '{prop_id}' to resource class {res_class.get('name')}." - f"The error message was: {err.message}") - except Exception as exception: - print( - f"ERROR while trying to add cardinality '{prop_id}' to resource class {res_class.get('name')}." - f"The error message was: {exception}") - - new_ontology.lastModificationDate = last_modification_date + res_class_remote = new_res_classes.get(ontology_remote.id + "#" + res_class["name"]) + if not res_class_remote: + print(f"WARNING: Unable to add cardinalities to resource class '{res_class['name']}': This class " + f"doesn't exist on the DSP server.") + overall_success = False + continue + for card_info in res_class.get("cardinalities"): + if ":" in card_info["propname"]: + prefix, prop = card_info["propname"].split(":") + qualified_propname = card_info["propname"] if prefix else f"{ontology_remote.name}:{prop}" + if not new_prop_classes.get(ontology_remote.id + "#" + prop): + print(f"WARNING: Unable to add cardinality '{card_info['propname']}' to resource class " + f"'{res_class['name']}': This property class doesn't exist on the DSP server.") + overall_success = False + continue + else: + qualified_propname = knora_api_prefix + card_info["propname"] + + try: + last_modification_date = try_network_action( + action=lambda: res_class_remote.addProperty( + property_id=qualified_propname, + cardinality=switcher[card_info["cardinality"]], + gui_order=card_info.get("gui_order"), + last_modification_date=last_modification_date + ), + failure_msg=f"WARNING: Unable to add cardinality '{qualified_propname}' to resource class " + f"{res_class['name']}." + ) + if verbose: + print(f"\tAdded cardinality '{card_info['propname']}' to resource class '{res_class['name']}'") + except BaseError as err: + print(err.message) + overall_success = False + + ontology_remote.lastModificationDate = last_modification_date + + # final steps + ############# + if overall_success: + print("========================================================\n", + f"Successfully created project '{project_definition['project']['shortname']}' " + f"({project_definition['project']['shortcode']}) with all its ontologies. There were no problems during " + f"the creation process.") + else: + print("========================================================\n", + f"WARNING: The project '{project_definition['project']['shortname']}' ({project_definition['project']['shortcode']}) " + f"with its ontologies could be created, but during the creation process, some problems occurred. " + f"Please carefully check the console output.") + return overall_success diff --git a/knora/dsplib/utils/onto_validate.py b/knora/dsplib/utils/onto_validate.py index 5a397875e..fbcaf6e90 100644 --- a/knora/dsplib/utils/onto_validate.py +++ b/knora/dsplib/utils/onto_validate.py @@ -5,93 +5,98 @@ import json import jsonpath_ng, jsonpath_ng.ext import networkx as nx -from ..utils.expand_all_lists import expand_lists_from_excel +from .excel_to_json_lists import expand_lists_from_excel +from ..models.helpers import BaseError -def validate_ontology(input_file_or_json: Union[str, dict[Any, Any], 'os.PathLike[Any]']) -> bool: +def validate_project( + input_file_or_json: Union[dict[str, Any], os.PathLike[Any]], + expand_lists: bool = True +) -> bool: """ - Validates an ontology against the knora schema + Validates a JSON project definition file. First, the Excel file references in the "lists" section are expanded + (unless this behaviour is disabled). Then, the project is validated against the JSON schema. At last, a check is + performed if this project's ontologies contain properties derived from hasLinkTo that form a circular reference. If + so, these properties must have the cardinality 0-1 or 0-n, because during the xmlupload process, these values + are temporarily removed. Args: - input_file_or_json: the ontology to be validated, can either be a file or a json string (dict) + input_file_or_json: the project to be validated, can either be a file path or a parsed JSON file + expand_lists: if True, the Excel file references in the "lists" section will be expanded Returns: - True if ontology passed validation, False otherwise + True if the project passed validation. Otherwise, a BaseError with a detailed error report is raised. """ - data_model: dict[Any, Any] = {} if isinstance(input_file_or_json, dict): - data_model = input_file_or_json + project_definition = input_file_or_json elif os.path.isfile(input_file_or_json): with open(input_file_or_json) as f: - onto_json_str = f.read() - data_model = json.loads(onto_json_str) + project_json_str = f.read() + project_definition = json.loads(project_json_str) else: - print('Input is not valid.') - exit(1) - - # expand all lists referenced in the list section of the data model - new_lists = expand_lists_from_excel(data_model) + raise BaseError(f"Input '{input_file_or_json}' is neither a file path nor a JSON object.") - # add the newly created lists from Excel to the ontology - data_model['project']['lists'] = new_lists + if expand_lists: + # expand all lists referenced in the "lists" section of the project definition, and add them to the project + # definition + new_lists, _ = expand_lists_from_excel(project_definition["project"].get("lists")) + if new_lists: + project_definition['project']['lists'] = new_lists - # validate the data model against the schema + # validate the project definition against the schema current_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(current_dir, '../schemas/ontology.json')) as s: schema = json.load(s) try: - jsonschema.validate(instance=data_model, schema=schema) + jsonschema.validate(instance=project_definition, schema=schema) except jsonschema.exceptions.ValidationError as err: - print(f'Data model did not pass validation. The error message is: {err.message}\n' + raise BaseError(f'JSON project file did not pass validation. The error message is: {err.message}\n' f'The error occurred at {err.json_path}') - return False # cardinalities check for circular references - if check_cardinalities_of_circular_references(data_model): - print('Data model is syntactically correct and passed validation.') + if _check_cardinalities_of_circular_references(project_definition): return True - else: - return False -def check_cardinalities_of_circular_references(data_model: dict[Any, Any]) -> bool: +def _check_cardinalities_of_circular_references(project_definition: dict[Any, Any]) -> bool: """ - Check a data model if it contains properties derived from hasLinkTo that form a circular reference. If so, these - properties must have the cardinality 0-1 or 0-n, because during the xmlupload process, these values + Check a JSON project file if it contains properties derived from hasLinkTo that form a circular reference. If so, + these properties must have the cardinality 0-1 or 0-n, because during the xmlupload process, these values are temporarily removed. Args: - data_model: dictionary with a DSP project (as defined in a JSON ontology file) + project_definition: dictionary with a DSP project (as defined in a JSON project file) Returns: True if no circle was detected, or if all elements of all circles are of cardinality "0-1" or "0-n". False if there is a circle with at least one element that has a cardinality of "1" or "1-n". """ - link_properties = collect_link_properties(data_model) - errors = identify_problematic_cardinalities(data_model, link_properties) + link_properties = _collect_link_properties(project_definition) + errors = _identify_problematic_cardinalities(project_definition, link_properties) if len(errors) == 0: return True else: - print('ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references ' - 'between resources. This is not a problem in itself, but if you try to upload data that actually ' - 'contains circular references, these "hasLinkTo" properties will be temporarily removed from the ' - 'affected resources. Therefore, it is necessary that all involved "hasLinkTo" properties have a ' - 'cardinality of 0-1 or 0-n. \n' - 'Please make sure that the following properties have a cardinality of 0-1 or 0-n:') + error_message = \ + 'ERROR: Your ontology contains properties derived from "hasLinkTo" that allow circular references ' \ + 'between resources. This is not a problem in itself, but if you try to upload data that actually ' \ + 'contains circular references, these "hasLinkTo" properties will be temporarily removed from the ' \ + 'affected resources. Therefore, it is necessary that all involved "hasLinkTo" properties have a ' \ + 'cardinality of 0-1 or 0-n. \n' \ + 'Please make sure that the following properties have a cardinality of 0-1 or 0-n:' for error in errors: - print(f'\t- Resource {error[0]}, property {error[1]}') - return False + error_message = error_message + f'\n\t- Resource {error[0]}, property {error[1]}' + raise BaseError(error_message) -def collect_link_properties(data_model: dict[Any, Any]) -> dict[str, list[str]]: +def _collect_link_properties(project_definition: dict[Any, Any]) -> dict[str, list[str]]: """ map the properties derived from hasLinkTo to the resource classes they point to, for example: link_properties = {'rosetta:hasImage2D': ['rosetta:Image2D'], ...} """ - ontos = data_model['project']['ontologies'] + ontos = project_definition['project']['ontologies'] hasLinkTo_props = {'hasLinkTo', 'isPartOf', 'isRegionOf', 'isAnnotationOf'} link_properties: dict[str, list[str]] = dict() for index, onto in enumerate(ontos): @@ -101,7 +106,7 @@ def collect_link_properties(data_model: dict[Any, Any]) -> dict[str, list[str]]: for hasLinkTo_prop in hasLinkTo_props: hasLinkTo_matches.extend(jsonpath_ng.ext.parse( f'$.project.ontologies[{index}].properties[?super[*] == {hasLinkTo_prop}]' - ).find(data_model)) + ).find(project_definition)) # make the children from this iteration to the parents of the next iteration hasLinkTo_props = {x.value['name'] for x in hasLinkTo_matches} prop_obj_pair: dict[str, list[str]] = dict() @@ -127,7 +132,7 @@ def collect_link_properties(data_model: dict[Any, Any]) -> dict[str, list[str]]: return link_properties -def identify_problematic_cardinalities(data_model: dict[Any, Any], link_properties: dict[str, list[str]]) -> list[tuple[str, str]]: +def _identify_problematic_cardinalities(project_definition: dict[Any, Any], link_properties: dict[str, list[str]]) -> list[tuple[str, str]]: """ make an error list with all cardinalities that are part of a circle but have a cardinality of "1" or "1-n" """ @@ -136,7 +141,7 @@ def identify_problematic_cardinalities(data_model: dict[Any, Any], link_properti # cardinalities = {'rosetta:Text': {'rosetta:hasImage2D': '0-1', ...}} dependencies: dict[str, dict[str, list[str]]] = dict() cardinalities: dict[str, dict[str, str]] = dict() - for onto in data_model['project']['ontologies']: + for onto in project_definition['project']['ontologies']: for resource in onto['resources']: resname: str = onto['name'] + ':' + resource['name'] for card in resource['cardinalities']: diff --git a/knora/dsplib/utils/shared_methods.py b/knora/dsplib/utils/shared_methods.py new file mode 100644 index 000000000..9c69aa508 --- /dev/null +++ b/knora/dsplib/utils/shared_methods.py @@ -0,0 +1,84 @@ +import re +import time +from datetime import datetime +from typing import Union, Callable, Any, Optional + +from requests import RequestException + +from knora.dsplib.models.connection import Connection +from knora.dsplib.models.helpers import BaseError + + +def login(server: str, user: str, password: str) -> Connection: + """ + Logs in and returns the active connection. Raises a BaseError if the login fails. + + Args: + server: URL of the DSP server to connect to + user: Username (e-mail) + password: Password of the user + + Return: + Connection instance + """ + con = Connection(server) + try_network_action( + action=lambda: con.login(email=user, password=password), + failure_msg="ERROR: Cannot login to DSP server" + ) + return con + + +def try_network_action( + failure_msg: str, + action: Callable[..., Any] +) -> Any: + """ + Helper method that tries 7 times to execute an action. Each time, it catches ConnectionError and + requests.exceptions.RequestException, which lead to a waiting time and a retry. The waiting times are 1, + 2, 4, 8, 16, 32, 64 seconds. + + In case of a BaseError or Exception, a BaseError is raised with failure_msg, followed by the original + error message. + + If there is no success at the end, a BaseError with failure_msg is raised. + + Args: + failure_msg: message of the raised BaseError if action cannot be executed + action: a lambda with the code to be executed + + Returns: + the return value of action + """ + + for i in range(7): + try: + return action() + except ConnectionError: + print(f'{datetime.now().isoformat()}: Try reconnecting to DSP server, next attempt in {2 ** i} seconds...') + time.sleep(2 ** i) + continue + except RequestException: + print(f'{datetime.now().isoformat()}: Try reconnecting to DSP server, next attempt in {2 ** i} seconds...') + time.sleep(2 ** i) + continue + except BaseError as err: + if re.search(r'try again later', err.message) or re.search(r'status code=5\d\d', err.message): + print(f'{datetime.now().isoformat()}: Try reconnecting to DSP server, next attempt in {2 ** i} seconds...') + time.sleep(2 ** i) + continue + if hasattr(err, 'message'): + err_message = err.message + else: + err_message = str(err).replace('\n', ' ') + err_message = err_message[:150] if len(err_message) > 150 else err_message + raise BaseError(f"{failure_msg} Error message: {err_message}") + except Exception as exc: + if hasattr(exc, 'message'): + exc_message = exc.message + else: + exc_message = str(exc).replace('\n', ' ') + exc_message = exc_message[:150] if len(exc_message) > 150 else exc_message + raise BaseError(f"{failure_msg} Error message: {exc_message}") + + raise BaseError(failure_msg) diff --git a/knora/dsplib/utils/xml_upload.py b/knora/dsplib/utils/xml_upload.py index 633be9593..7b5135127 100644 --- a/knora/dsplib/utils/xml_upload.py +++ b/knora/dsplib/utils/xml_upload.py @@ -5,14 +5,12 @@ import json import os import re -import time import uuid from datetime import datetime from pathlib import Path -from typing import Optional, Union, cast, Tuple, Any, Callable +from typing import Optional, cast, Tuple, Any from urllib.parse import quote_plus from lxml import etree -from requests import RequestException from knora.dsplib.models.projectContext import ProjectContext from knora.dsplib.models.connection import Connection @@ -24,6 +22,7 @@ from knora.dsplib.models.xmlpermission import XmlPermission from knora.dsplib.models.xmlproperty import XMLProperty from knora.dsplib.models.xmlresource import XMLResource +from knora.dsplib.utils.shared_methods import try_network_action def _remove_circular_references(resources: list[XMLResource], verbose: bool) -> \ @@ -211,7 +210,7 @@ def _convert_ark_v0_to_resource_iri(ark: str) -> str: return "http://rdfh.ch/" + project_id + "/" + dsp_uuid -def parse_xml_file(input_file: str) -> etree.ElementTree: +def _parse_xml_file(input_file: str) -> etree.ElementTree: """ Parse an XML file with DSP-conform data, remove namespace URI from the elements' names, and transform the special tags , , and to their technically correct form , @@ -282,7 +281,7 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s proj_context = ProjectContext(con=con) sipi_server = Sipi(sipi, con.get_token()) - tree = parse_xml_file(input_file) + tree = _parse_xml_file(input_file) root = tree.getroot() default_ontology = root.attrib['default-ontology'] shortcode = root.attrib['shortcode'] @@ -392,13 +391,13 @@ def _upload_resources( # in case of a multimedia resource: upload the multimedia file resource_bitstream = None if resource.bitstream: - img: Optional[dict[Any, Any]] = _try_network_action( - object=sipi_server, - method='upload_bitstream', - kwargs={'filepath': os.path.join(imgdir, resource.bitstream.value)}, - terminal_output_on_failure=f'ERROR while trying to create resource "{resource.label}" ({resource.id}).' - ) - if not img: + try: + img: Optional[dict[Any, Any]] = try_network_action( + action=lambda: sipi_server.upload_bitstream(filepath=os.path.join(imgdir, resource.bitstream.value)), + failure_msg=f'ERROR while trying to create resource "{resource.label}" ({resource.id}).' + ) + except BaseError as err: + print(err.message) failed_uploads.append(resource.id) continue internal_file_name_bitstream = img['uploadedFiles'][0]['internalFilename'] @@ -407,28 +406,30 @@ def _upload_resources( # create the resource in DSP resclass_type = resclass_name_2_type[resource.restype] properties = resource.get_propvals(id2iri_mapping, permissions_lookup) - resource_instance: ResourceInstance = _try_network_action( - method=resclass_type, - kwargs={ - 'con': con, - 'label': resource.label, - 'iri': resource_iri, - 'permissions': permissions_lookup.get(resource.permissions), - 'bitstream': resource_bitstream, - 'values': properties - }, - terminal_output_on_failure=f"ERROR while trying to create resource '{resource.label}' ({resource.id})." - ) - if not resource_instance: + try: + resource_instance: ResourceInstance = try_network_action( + action=lambda: resclass_type( + con=con, + label=resource.label, + iri=resource_iri, + permissions=permissions_lookup.get(resource.permissions), + bitstream=resource_bitstream, + values=properties + ), + failure_msg=f"ERROR while trying to create resource '{resource.label}' ({resource.id})." + ) + except BaseError as err: + print(err.message) failed_uploads.append(resource.id) continue - created_resource: ResourceInstance = _try_network_action( - object=resource_instance, - method='create', - terminal_output_on_failure=f"ERROR while trying to create resource '{resource.label}' ({resource.id})." - ) - if not created_resource: + try: + created_resource: ResourceInstance = try_network_action( + action=lambda: resource_instance.create(), + failure_msg=f"ERROR while trying to create resource '{resource.label}' ({resource.id})." + ) + except BaseError as err: + print(err.message) failed_uploads.append(resource.id) continue id2iri_mapping[resource.id] = created_resource.iri @@ -462,13 +463,13 @@ def _upload_stashed_xml_texts( # resource could not be uploaded to DSP, so the stash cannot be uploaded either continue res_iri = id2iri_mapping[resource.id] - existing_resource = _try_network_action( - object=con, - method='get', - kwargs={'path': f'/v2/resources/{quote_plus(res_iri)}'}, - terminal_output_on_failure=f' ERROR while retrieving resource "{resource.id}" from DSP server' - ) - if not existing_resource: + try: + existing_resource = try_network_action( + action=lambda: con.get(path=f'/v2/resources/{quote_plus(res_iri)}'), + failure_msg=f' ERROR while retrieving resource "{resource.id}" from DSP server.' + ) + except BaseError as err: + print(err.message) continue print(f' Upload XML text(s) of resource "{resource.id}"...') for link_prop, hash_to_value in link_props.items(): @@ -510,14 +511,13 @@ def _upload_stashed_xml_texts( jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': '), cls=KnoraStandoffXmlEncoder) # execute API call - response = _try_network_action( - object=con, - method='put', - kwargs={'path': '/v2/values', 'jsondata': jsondata}, - terminal_output_on_failure=f' ERROR while uploading the xml text of "{link_prop.name}" ' - f'of resource "{resource.id}"' - ) - if not response: + try: + try_network_action( + action=lambda: con.put(path='/v2/values', jsondata=jsondata), + failure_msg=f' ERROR while uploading the xml text of "{link_prop.name}" of resource "{resource.id}"' + ) + except BaseError as err: + print(err.message) continue stashed_xml_texts[resource][link_prop].pop(pure_text) if verbose: @@ -566,13 +566,13 @@ def _upload_stashed_resptr_props( # resource could not be uploaded to DSP, so the stash cannot be uploaded either continue res_iri = id2iri_mapping[resource.id] - existing_resource = _try_network_action( - object=con, - method='get', - kwargs={'path': f'/v2/resources/{quote_plus(res_iri)}'}, - terminal_output_on_failure=f' ERROR while retrieving resource "{resource.id}" from DSP server' - ) - if not existing_resource: + try: + existing_resource = try_network_action( + action=lambda: con.get(path=f'/v2/resources/{quote_plus(res_iri)}'), + failure_msg=f' ERROR while retrieving resource "{resource.id}" from DSP server' + ) + except BaseError as err: + print(err.message) continue print(f' Upload resptrs of resource "{resource.id}"...') for link_prop, resptrs in prop_2_resptrs.items(): @@ -591,14 +591,13 @@ def _upload_stashed_resptr_props( '@context': existing_resource['@context'] } jsondata = json.dumps(jsonobj, indent=4, separators=(',', ': ')) - response = _try_network_action( - object=con, - method='post', - kwargs={'path': '/v2/values', 'jsondata': jsondata}, - terminal_output_on_failure=f'ERROR while uploading the resptr prop of "{link_prop.name}" ' - f'of resource "{resource.id}"' - ) - if not response: + try: + try_network_action( + action=lambda: con.post(path='/v2/values', jsondata=jsondata), + failure_msg=f'ERROR while uploading the resptr prop of "{link_prop.name}" of resource "{resource.id}"' + ) + except BaseError as err: + print(err.message) continue stashed_resptr_props[resource][link_prop].remove(resptr) if verbose: @@ -610,69 +609,6 @@ def _upload_stashed_resptr_props( return nonapplied_resptr_props -def _try_network_action( - terminal_output_on_failure: str, - method: Union[str, Callable[..., Any]], - object: Optional[Any] = None, - kwargs: Optional[dict[str, Any]] = None -) -> Any: - """ - Helper method that tries 7 times to execute an action. Each time, it catches ConnectionError and - requests.exceptions.RequestException, which lead to a waiting time and a retry. The waiting times are 1, - 2, 4, 8, 16, 32, 64 seconds. It also catches BaseError and Exception each time, which lead to a message being - printed and None being returned. - If there is still no success at the end, the message is printed and None is returned. - - Args: - terminal_output_on_failure: message to be printed if action cannot be executed - method: either a callable to be called on its own, or a method name (as string) to be called on object - object: if provided, it must be a python variable/object, accompanied by a method name (as string) - kwargs: if provided, a dict with the arguments passed to method - - Returns: - the return value of action, or None - """ - - for i in range(7): - try: - if object and isinstance(method, str): - if not kwargs: - return getattr(object, method)() - else: - return getattr(object, method)(**kwargs) - else: - if not kwargs: - return method() - else: - return method(**kwargs) - except ConnectionError: - print(f'{datetime.now().isoformat()}: Try reconnecting to DSP server, next attempt in {2 ** i} seconds...') - time.sleep(2 ** i) - continue - except RequestException: - print(f'{datetime.now().isoformat()}: Try reconnecting to DSP server, next attempt in {2 ** i} seconds...') - time.sleep(2 ** i) - continue - except BaseError as err: - if hasattr(err, 'message'): - err_message = err.message - else: - err_message = str(err).replace('\n', ' ') - err_message = err_message[:150] if len(err_message) > 150 else err_message - print(f"{terminal_output_on_failure} Error message: {err_message}") - return None - except Exception as exc: - if hasattr(exc, 'message'): - exc_message = exc.message - else: - exc_message = str(exc).replace('\n', ' ') - exc_message = exc_message[:150] if len(exc_message) > 150 else exc_message - print(f"{terminal_output_on_failure} Error message: {exc_message}") - return None - print(terminal_output_on_failure) - return None - - def _purge_stashed_resptr_props( stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]] ) -> dict[XMLResource, dict[XMLProperty, list[str]]]: diff --git a/test/e2e/test_tools.py b/test/e2e/test_tools.py index 210020670..d9da19851 100644 --- a/test/e2e/test_tools.py +++ b/test/e2e/test_tools.py @@ -9,9 +9,8 @@ from knora.dsplib.utils.excel_to_json_properties import properties_excel2json from knora.dsplib.utils.excel_to_json_resources import resources_excel2json from knora.dsplib.utils.id_to_iri import id_to_iri -from knora.dsplib.utils.onto_create_ontology import create_ontology +from knora.dsplib.utils.onto_create_ontology import create_project from knora.dsplib.utils.onto_get import get_ontology -from knora.dsplib.utils.onto_validate import validate_ontology from knora.dsplib.utils.xml_upload import xml_upload @@ -21,8 +20,8 @@ class TestTools(unittest.TestCase): password = 'test' imgdir = '.' sipi = 'http://0.0.0.0:1024' - test_onto_file = 'testdata/test-onto.json' - test_list_file = 'testdata/test-list.json' + test_project_file = 'testdata/test-project-systematic.json' + test_project_minimal_file = 'testdata/test-project-minimal.json' test_data_file = 'testdata/test-data.xml' def setUp(self) -> None: @@ -37,29 +36,29 @@ def tearDown(self) -> None: excel_to_json_lists.cell_names = [] def test_get(self) -> None: - with open(self.test_onto_file) as f: - onto_json_str = f.read() - test_onto = json.loads(onto_json_str) + with open(self.test_project_file) as f: + project_json_str = f.read() + test_project = json.loads(project_json_str) get_ontology(project_identifier='tp', - outfile='testdata/tmp/_test-onto.json', + outfile='testdata/tmp/_test-project-systematic.json', server=self.server, user=self.user, password='test', verbose=True) - with open('testdata/tmp/_test-onto.json') as f: - onto_json_str = f.read() - test_onto_out = json.loads(onto_json_str) + with open('testdata/tmp/_test-project-systematic.json') as f: + project_json_str = f.read() + test_project_out = json.loads(project_json_str) - self.assertEqual(test_onto['project']['shortcode'], test_onto_out['project']['shortcode']) - self.assertEqual(test_onto['project']['shortname'], test_onto_out['project']['shortname']) - self.assertEqual(test_onto['project']['longname'], test_onto_out['project']['longname']) - self.assertEqual(test_onto['project']['descriptions'], test_onto_out['project']['descriptions']) - self.assertEqual(sorted(test_onto['project']['keywords']), sorted(test_onto_out['project']['keywords'])) + self.assertEqual(test_project['project']['shortcode'], test_project_out['project']['shortcode']) + self.assertEqual(test_project['project']['shortname'], test_project_out['project']['shortname']) + self.assertEqual(test_project['project']['longname'], test_project_out['project']['longname']) + self.assertEqual(test_project['project']['descriptions'], test_project_out['project']['descriptions']) + self.assertEqual(sorted(test_project['project']['keywords']), sorted(test_project_out['project']['keywords'])) - groups_expected = test_onto['project']['groups'] - groups_received = test_onto_out['project']['groups'] + groups_expected = test_project['project']['groups'] + groups_received = test_project_out['project']['groups'] group_names_expected = [] group_descriptions_expected = [] group_selfjoin_expected = [] @@ -68,23 +67,23 @@ def test_get(self) -> None: group_descriptions_received = [] group_selfjoin_received = [] group_status_received = [] - for group in groups_expected: + for group in sorted(groups_expected, key=lambda x: x["name"]): group_names_expected.append(group["name"]) group_descriptions_expected.append(group["descriptions"]["en"]) - group_selfjoin_expected.append(group["selfjoin"]) - group_status_expected.append(group["status"]) - for group in groups_received: + group_selfjoin_expected.append(group.get("selfjoin", False)) + group_status_expected.append(group.get("status", True)) + for group in sorted(groups_received, key=lambda x: x["name"]): groups_names_received.append(group["name"]) group_descriptions_received.append(group["descriptions"]["en"]) - group_selfjoin_received.append(group["selfjoin"]) - group_status_received.append(group["status"]) + group_selfjoin_received.append(group.get("selfjoin", False)) + group_status_received.append(group.get("status", True)) self.assertEqual(sorted(group_names_expected), sorted(groups_names_received)) self.assertEqual(sorted(group_descriptions_expected), sorted(group_descriptions_received)) self.assertEqual(group_selfjoin_expected, group_selfjoin_received) self.assertEqual(group_status_expected, group_status_received) - users_expected = test_onto['project']['users'] - users_received = test_onto_out['project']['users'] + users_expected = test_project['project']['users'] + users_received = test_project_out['project']['users'] user_username_expected = [] user_email_expected = [] user_given_name_expected = [] @@ -96,7 +95,8 @@ def test_get(self) -> None: user_family_name_received = [] user_lang_received = [] for user in users_expected: - if user["username"] == "testerKnownUser": # ignore testerKnownUser as he is not part of the project + if user["username"] in ["testerKnownUser", "testerSystemAdmin"]: + # ignore the ones who are not part of the project continue user_username_expected.append(user["username"]) user_email_expected.append(user["email"]) @@ -115,8 +115,8 @@ def test_get(self) -> None: self.assertEqual(sorted(user_family_name_expected), sorted(user_family_name_received)) self.assertEqual(sorted(user_lang_expected), sorted(user_lang_received)) - ontos_expected = test_onto['project']['ontologies'] - ontos_received = test_onto_out['project']['ontologies'] + ontos_expected = test_project['project']['ontologies'] + ontos_received = test_project_out['project']['ontologies'] onto_names_expected = [] onto_labels_expected = [] onto_names_received = [] @@ -130,12 +130,12 @@ def test_get(self) -> None: self.assertEqual(sorted(onto_names_expected), sorted(onto_names_received)) self.assertEqual(sorted(onto_labels_expected), sorted(onto_labels_received)) - lists = test_onto['project']['lists'] + lists = test_project['project']['lists'] test_list: dict[str, str] = next((l for l in lists if l['name'] == 'testlist'), {}) not_used_list: dict[str, str] = next((l for l in lists if l['name'] == 'notUsedList'), {}) excel_list: dict[str, str] = next((l for l in lists if l['name'] == 'my-list-from-excel'), {}) - lists_out = test_onto_out['project']['lists'] + lists_out = test_project_out['project']['lists'] test_list_out: dict[str, str] = next((l for l in lists_out if l['name'] == 'testlist'), {}) not_used_list_out: dict[str, str] = next((l for l in lists_out if l['name'] == 'notUsedList'), {}) excel_list_out: dict[str, str] = next((l for l in lists_out if l['name'] == 'my-list-from-excel'), {}) @@ -163,14 +163,25 @@ def test_excel_to_json_properties(self) -> None: properties_excel2json(excelfile='testdata/Properties.xlsx', outfile='testdata/tmp/_out_properties.json') - def test_create_ontology(self) -> None: - create_ontology(input_file=self.test_onto_file, - lists_file=self.test_list_file, - server=self.server, - user_mail=self.user, - password='test', - verbose=True, - dump=False) + def test_create_project(self) -> None: + result1 = create_project( + input_file=self.test_project_file, + server=self.server, + user_mail=self.user, + password='test', + verbose=True, + dump=False + ) + result2 = create_project( + input_file=self.test_project_minimal_file, + server=self.server, + user_mail=self.user, + password='test', + verbose=True, + dump=False + ) + self.assertTrue(result1) + self.assertTrue(result2) def test_xml_upload(self) -> None: result = xml_upload( diff --git a/test/unittests/test_create_ontology.py b/test/unittests/test_create_ontology.py index 5aeca6973..20937c0ed 100644 --- a/test/unittests/test_create_ontology.py +++ b/test/unittests/test_create_ontology.py @@ -4,15 +4,15 @@ from typing import Any import jsonpath_ng.ext -from knora.dsplib.utils.onto_create_ontology import sort_resources, sort_prop_classes -from knora.dsplib.utils.onto_validate import collect_link_properties, identify_problematic_cardinalities +from knora.dsplib.utils.onto_create_ontology import _sort_resources, _sort_prop_classes +from knora.dsplib.utils.onto_validate import _collect_link_properties, _identify_problematic_cardinalities class TestOntoCreation(unittest.TestCase): - with open('testdata/test-onto.json', 'r') as json_file: + with open('testdata/test-project-systematic.json', 'r') as json_file: project: dict[str, Any] = json.load(json_file) ontology: dict[str, Any] = project['project']['ontologies'][0] - with open('testdata/circular-onto.json', 'r') as json_file: + with open('testdata/test-project-circular-ontology.json', 'r') as json_file: circular_onto: dict[str, Any] = json.load(json_file) def test_sort_resources(self) -> None: @@ -23,7 +23,7 @@ def test_sort_resources(self) -> None: """ onto_name: str = self.ontology['name'] unsorted_resources: list[dict[str, Any]] = self.ontology['resources'] - sorted_resources = sort_resources(unsorted_resources, onto_name) + sorted_resources = _sort_resources(unsorted_resources, onto_name) unsorted_resources = sorted(unsorted_resources, key=lambda a: a['name']) sorted_resources = sorted(sorted_resources, key=lambda a: a['name']) @@ -39,7 +39,7 @@ def test_sort_prop_classes(self) -> None: """ onto_name: str = self.ontology['name'] unsorted_props: list[dict[str, Any]] = self.ontology['resources'] - sorted_props = sort_prop_classes(unsorted_props, onto_name) + sorted_props = _sort_prop_classes(unsorted_props, onto_name) unsorted_props = sorted(unsorted_props, key=lambda a: a['name']) sorted_props = sorted(sorted_props, key=lambda a: a['name']) @@ -48,8 +48,8 @@ def test_sort_prop_classes(self) -> None: def test_circular_references_in_onto(self) -> None: - link_properties = collect_link_properties(self.circular_onto) - errors = identify_problematic_cardinalities(self.circular_onto, link_properties) + link_properties = _collect_link_properties(self.circular_onto) + errors = _identify_problematic_cardinalities(self.circular_onto, link_properties) expected_errors = [ ('testonto:AnyResource', 'testonto:linkToTestThing1'), ('testonto:TestThing3', 'testonto:linkToResource') diff --git a/test/unittests/test_id_to_iri.py b/test/unittests/test_id_to_iri.py index 3eee43f07..7e506f78f 100644 --- a/test/unittests/test_id_to_iri.py +++ b/test/unittests/test_id_to_iri.py @@ -3,7 +3,7 @@ import unittest import os -from knora.dsplib.utils.xml_upload import parse_xml_file +from knora.dsplib.utils.xml_upload import _parse_xml_file from knora.dsplib.utils.id_to_iri import id_to_iri @@ -38,7 +38,7 @@ def test_replace_id_with_iri(self) -> None: out_file=self.out_file, verbose=True) - tree = parse_xml_file(self.out_file) + tree = _parse_xml_file(self.out_file) resource_elements = tree.xpath("/knora/resource/resptr-prop/resptr") result = [] diff --git a/test/unittests/test_xmlupload.py b/test/unittests/test_xmlupload.py index a6ee41a53..d0fb30235 100644 --- a/test/unittests/test_xmlupload.py +++ b/test/unittests/test_xmlupload.py @@ -4,7 +4,7 @@ from lxml import etree from knora.dsplib.models.helpers import BaseError -from knora.dsplib.utils.xml_upload import _convert_ark_v0_to_resource_iri, _remove_circular_references, parse_xml_file +from knora.dsplib.utils.xml_upload import _convert_ark_v0_to_resource_iri, _remove_circular_references, _parse_xml_file from knora.dsplib.models.xmlresource import XMLResource @@ -34,7 +34,7 @@ def test_convert_ark_v0_to_resource_iri(self) -> None: def test_remove_circular_references(self) -> None: # create a list of XMLResources from the test data file - tree = parse_xml_file('testdata/test-data.xml') + tree = _parse_xml_file('testdata/test-data.xml') resources = [XMLResource(x, 'testonto') for x in tree.getroot() if x.tag == "resource"] # get the purged resources and the stashes from the function to be tested diff --git a/testdata/circular-onto.json b/testdata/test-project-circular-ontology.json similarity index 100% rename from testdata/circular-onto.json rename to testdata/test-project-circular-ontology.json diff --git a/testdata/test-project-minimal.json b/testdata/test-project-minimal.json new file mode 100644 index 000000000..33fc0e8b5 --- /dev/null +++ b/testdata/test-project-minimal.json @@ -0,0 +1,48 @@ +{ + "$schema": "../knora/dsplib/schemas/ontology.json", + "project": { + "shortcode": "4124", + "shortname": "minimal-tp", + "longname": "minimal test project", + "descriptions": { + "en": "A minimal test project" + }, + "keywords": [ + "minimal" + ], + "ontologies": [ + { + "name": "minimalOnto", + "label": "minimal onto", + "properties": [ + { + "name": "hasText", + "super": [ + "hasValue" + ], + "object": "TextValue", + "labels": { + "en": "Text" + }, + "gui_element": "SimpleText" + } + ], + "resources": [ + { + "name": "minimalResource", + "super": "Resource", + "labels": { + "en": "Minimal Resource" + }, + "cardinalities": [ + { + "propname": ":hasText", + "cardinality": "0-n" + } + ] + } + ] + } + ] + } +} diff --git a/testdata/test-onto.json b/testdata/test-project-systematic.json similarity index 96% rename from testdata/test-onto.json rename to testdata/test-project-systematic.json index fb5264fab..69e0dafc8 100644 --- a/testdata/test-onto.json +++ b/testdata/test-project-systematic.json @@ -107,8 +107,7 @@ "de": "Testgruppe Editors", "rm": "squadra test 'Editors' in Rumantsch" }, - "selfjoin": false, - "status": true + "selfjoin": false }, { "name": "testgroupViewers", @@ -116,8 +115,16 @@ "en": "Test group viewers", "de": "Testgruppe Viewers" }, - "selfjoin": false, + "selfjoin": true, "status": true + }, + { + "name": "testgroupInactive", + "descriptions": { + "en": "Test group inactive", + "de": "Testgruppe Inaktiv" + }, + "status": false } ], "users": [ @@ -167,6 +174,33 @@ "projects": [ ":admin" ] + }, + { + "username": "testerSystemAdmin", + "email": "tester.systemadmin@test.org", + "givenName": "Tester", + "familyName": "System Admin", + "password": "test0815", + "lang": "en", + "groups": [ + "SystemAdmin" + ], + "status": true + }, + { + "username": "inactiveUser", + "email": "inactive.user@test.org", + "givenName": "Inactive", + "familyName": "User", + "password": "test0815", + "lang": "en", + "groups": [ + "SystemAdmin" + ], + "projects": [ + ":admin" + ], + "status": false } ], "ontologies": [