diff --git a/Makefile b/Makefile index 775f999dc..95705f5fd 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,6 @@ THIS_FILE := $(lastword $(MAKEFILE_LIST)) CURRENT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) -include vars.mk - ################################# # Make targets for dsp-tools ################################# diff --git a/README.md b/README.md index 2577147da..6a8a4c6aa 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,6 @@ in `pyproject.toml` in the root directory of the project. ```toml [tool.autopep8] max_line_length = 180 -in-place = true experimental = true [tool.mypy] @@ -67,8 +66,11 @@ and `mypy --config-file pyproject.toml [file path]`. If you are using PyCharm we recommend installing autopep8 as external tool. You can then use it with right-click on the -file > `External Tools` > `autopep8` to reformat files in-place. mypy is available as -[plugin](https://plugins.jetbrains.com/plugin/11086-mypy). +file > `External Tools` > `autopep8` to reformat files in-place. Due to compatibility issues with VSCode, the argument +`--in-place=true` can not be declared in the `pyproject.toml` and needs to be passed to the external tool in the PyCharm settings. +mypy is available as [plugin](https://plugins.jetbrains.com/plugin/11086-mypy). + +In VSCode, both mypy and autopep8 can be set up as default linter and formatter through the python extension. For formatting Markdown files (*.md) we use the default styling configuration provided by PyCharm. diff --git a/docs/assets/images/img-properties-example.png b/docs/assets/images/img-properties-example.png index 43bcc20de..5d73cbf9e 100644 Binary files a/docs/assets/images/img-properties-example.png and b/docs/assets/images/img-properties-example.png differ diff --git a/docs/assets/images/img-resources-example-1.png b/docs/assets/images/img-resources-example-1.png index cf0745fd5..0ef19c91c 100644 Binary files a/docs/assets/images/img-resources-example-1.png and b/docs/assets/images/img-resources-example-1.png differ diff --git a/docs/assets/templates/properties_template.xlsx b/docs/assets/templates/properties_template.xlsx new file mode 100644 index 000000000..609b377e8 Binary files /dev/null and b/docs/assets/templates/properties_template.xlsx differ diff --git a/docs/assets/templates/resources_template.xlsx b/docs/assets/templates/resources_template.xlsx new file mode 100644 index 000000000..fda7a1805 Binary files /dev/null and b/docs/assets/templates/resources_template.xlsx differ diff --git a/docs/dsp-tools-excel.md b/docs/dsp-tools-excel.md index 3d0efbf57..b51b7da57 100644 --- a/docs/dsp-tools-excel.md +++ b/docs/dsp-tools-excel.md @@ -11,6 +11,8 @@ list from an Excel file. With dsp-tools the `resources` section used in a data model (JSON) can be created from an Excel file. Only `XLSX` files are allowed. The `resources` section can be inserted into the ontology file and then be uploaded onto a DSP server. +**An Excel file template can be found [here](assets/templates/resources_template.xlsx). It is recommended to work from the template.** + The expected worksheets of the Excel file are: - `classes`: a table with all resource classes intended to be used in the resulting JSON @@ -26,6 +28,7 @@ The expected columns are: - `name` : The name of the resource - `super` : The base resource of the resource - `en`, `de`, `fr`, `it` : The labels of the resource in different languages, at least one language has to be provided +- `comment_en`, `comment_de`, `comment_fr`, `comment_it`: optional comments in the respective language All other worksheets, one for each resource class, have the following form: ![img-resources-example-2.png](assets/images/img-resources-example-2.png){ width=50% } @@ -43,6 +46,8 @@ With dsp-tools the `properties` section used in a data model (JSON) can be creat worksheet of the Excel file is considered and only XLSX files are allowed. The `properties` section can be inserted into the ontology file and then be uploaded onto a DSP server. +**An Excel file template can be found [here](assets/templates/properties_template.xlsx). It is recommended to work from the template.** + The Excel sheet must have the following format: ![img-properties-example.png](assets/images/img-properties-example.png) @@ -52,6 +57,7 @@ The expected columns are: - `super` : The base property of the property - `object` : The resource the property refers to if it is a link property (property derived from `hasLinkTo`) - `en`, `de`, `fr`, `it` : The labels of the property in different languages, at least one language has to be provided +- `comment_en`, `comment_de`, `comment_fr`, `comment_it`: optional comments in the respective language - `gui_element` : The GUI element for the property - `hlist` : In case of list values the according list diff --git a/knora/dsplib/utils/excel_to_json_properties.py b/knora/dsplib/utils/excel_to_json_properties.py index be3a67dd8..2172cc2ed 100644 --- a/knora/dsplib/utils/excel_to_json_properties.py +++ b/knora/dsplib/utils/excel_to_json_properties.py @@ -1,5 +1,6 @@ import json import os +from typing import Any import jsonschema from openpyxl import load_workbook @@ -7,15 +8,15 @@ def validate_properties_with_schema(json_file: str) -> bool: """ - This function checks if the json properties are valid according to the schema. + This function checks if the json properties are valid according to the schema. - Args: - json_file: the json with the properties to be validated + Args: + json_file: the json with the properties to be validated - Returns: - True if the data passed validation, False otherwise + Returns: + True if the data passed validation, False otherwise - """ + """ current_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(current_dir, '../schemas/properties-only.json')) as schema: properties_schema = json.load(schema) @@ -29,21 +30,21 @@ def validate_properties_with_schema(json_file: str) -> bool: return True -def properties_excel2json(excelfile: str, outfile: str): +def properties_excel2json(excelfile: str, outfile: str) -> list[dict[str, Any]]: """ - Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology + Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology - Args: - excelfile: path to the Excel file containing the properties - outfile: path to the output JSON file containing the properties section for the ontology + Args: + excelfile: path to the Excel file containing the properties + outfile: path to the output JSON file containing the properties section for the ontology - Returns: - None + Returns: + List(JSON): a list with a dict (JSON) for each row in the Excel file """ # load file wb = load_workbook(filename=excelfile, read_only=True) sheet = wb.worksheets[0] - props = [row_to_prop(row) for row in sheet.iter_rows(min_row=2, values_only=True, max_col=9)] + props = [row_to_prop(row) for row in sheet.iter_rows(min_row=2, values_only=True, max_col=13)] prefix = '"properties":' @@ -59,7 +60,7 @@ def properties_excel2json(excelfile: str, outfile: str): return props -def row_to_prop(row): +def row_to_prop(row: tuple[str, str, str, str, str, str, str, str, str, str, str, str, str]) -> dict[str, Any]: """ Parses the row of an Excel sheet and makes a property from it @@ -69,7 +70,7 @@ def row_to_prop(row): Returns: prop (JSON): the property in JSON format """ - name, super_, object_, en, de, fr, it, gui_element, hlist = row + name, super_, object_, en, de, fr, it, comment_en, comment_de, comment_fr, comment_it, gui_element, hlist = row labels = {} if en: labels['en'] = en @@ -81,11 +82,21 @@ def row_to_prop(row): labels['it'] = it if not labels: raise Exception(f"No label given in any of the four languages: {name}") + comments = {} + if comment_en: + comments['en'] = comment_en + if comment_de: + comments['de'] = comment_de + if comment_fr: + comments['fr'] = comment_fr + if comment_it: + comments['it'] = comment_it prop = { 'name': name, 'super': [super_], 'object': object_, 'labels': labels, + 'comments': comments, 'gui_element': gui_element } if hlist: diff --git a/knora/dsplib/utils/excel_to_json_resources.py b/knora/dsplib/utils/excel_to_json_resources.py index bd0451a4e..d83e81342 100644 --- a/knora/dsplib/utils/excel_to_json_resources.py +++ b/knora/dsplib/utils/excel_to_json_resources.py @@ -1,21 +1,23 @@ import json import os +from typing import Any import jsonschema from openpyxl import load_workbook +from openpyxl.workbook.workbook import Workbook def validate_resources_with_schema(json_file: str) -> bool: """ - This function checks if the json resources are valid according to the schema. + This function checks if the json resources are valid according to the schema. - Args: - json_file: the json with the resources to be validated + Args: + json_file: the json with the resources to be validated - Returns: - True if the data passed validation, False otherwise + Returns: + True if the data passed validation, False otherwise - """ + """ current_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(current_dir, '../schemas/resources-only.json')) as schema: resources_schema = json.load(schema) @@ -29,17 +31,18 @@ def validate_resources_with_schema(json_file: str) -> bool: return True -def resources_excel2json(excelfile: str, outfile: str): +def resources_excel2json(excelfile: str, outfile: str) -> None: """ - Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology + Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology - Args: - excelfile: path to the Excel file containing the properties - outfile: path to the output JSON file containing the properties section for the ontology + Args: + excelfile: path to the Excel file containing the properties + outfile: path to the output JSON file containing the properties section for the ontology - Returns: - None + Returns: + None """ + # load file wb = load_workbook(excelfile, read_only=True) @@ -48,49 +51,7 @@ def resources_excel2json(excelfile: str, outfile: str): resource_list = [c for c in sheet.iter_rows(min_row=2, values_only=True)] prefix = '"resources":' - resources = [] - # for each resource in resources overview - for res in resource_list: - # get name - name = res[0] - # get labels - labels = {} - if res[1]: - labels['en'] = res[1] - if res[2]: - labels['de'] = res[2] - if res[3]: - labels['fr'] = res[3] - if res[4]: - labels['it'] = res[4] - # get super - sup = res[5] - - # load details for this resource - sh = wb[name] - property_list = [c for c in sh.iter_rows(min_row=2, values_only=True)] - - cards = [] - # for each of the detail sheets - for i, prop in enumerate(property_list): - # get name and cardinality. - # GUI-order is equal to order in the sheet. - property_ = { - "propname": ":" + prop[0], - "cardinality": str(prop[1]), - "gui_order": i + 1 - } - cards.append(property_) - - # build resource dict - resource = { - "name": name, - "labels": labels, - "super": sup, - "cardinalities": cards - } - # append to resources list - resources.append(resource) + resources = [_extract_row(res, wb) for res in resource_list] if validate_resources_with_schema(json.loads(json.dumps(resources, indent=4))): # write final list to JSON file if list passed validation @@ -100,3 +61,56 @@ def resources_excel2json(excelfile: str, outfile: str): print('Resource file was created successfully and written to file:', outfile) else: print('Resource data is not valid according to schema.') + + +def _extract_row(row: tuple[str, str, str, str, str, str, str, str, str, str], wb: Workbook) -> dict[str, Any]: + """build a property dict from a row of the excel file""" + # get name + name = row[0] + # get labels + labels = {} + if row[1]: + labels['en'] = row[1] + if row[2]: + labels['de'] = row[2] + if row[3]: + labels['fr'] = row[3] + if row[4]: + labels['it'] = row[4] + # get comments + comments = {} + if row[5]: + comments['en'] = row[5] + if row[6]: + comments['de'] = row[6] + if row[7]: + comments['fr'] = row[7] + if row[8]: + comments['it'] = row[8] + # get super + sup = row[9] + + # load details for this resource + sh = wb[name] + property_list = [c for c in sh.iter_rows(min_row=2, values_only=True)] + + cards = [] + # for each of the detail sheets + for i, prop in enumerate(property_list): + # get name and cardinality. + # GUI-order is equal to order in the sheet. + property_ = { + "propname": ":" + prop[0], + "cardinality": str(prop[1]), + "gui_order": i + 1 + } + cards.append(property_) + + # return resource dict + return { + "name": name, + "labels": labels, + "comments": comments, + "super": sup, + "cardinalities": cards + } diff --git a/pyproject.toml b/pyproject.toml index 87c49a993..a1bbccf13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,5 @@ [tool.autopep8] max_line_length = 180 -in-place = true experimental = true [tool.mypy] diff --git a/requirements.txt b/requirements.txt index e256b5f13..e9ce9a1c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ future==0.18.2 ghp-import==2.0.2 idna==3.3 importlib-metadata==4.8.1 +importlib-resources==5.4.0 isodate==0.6.0 Jinja2==3.0.2 joblib==1.1.0 @@ -53,6 +54,7 @@ six==1.16.0 tornado==6.1 tqdm==4.62.3 twine==3.5.0 +typing-extensions==4.0.1 urllib3==1.26.7 validators==0.18.2 watchdog==2.1.6 diff --git a/test/unittests/BUILD.bazel b/test/unittests/BUILD.bazel index b697da955..0853b3e54 100644 --- a/test/unittests/BUILD.bazel +++ b/test/unittests/BUILD.bazel @@ -1,30 +1,38 @@ -package(default_visibility = ["//visibility:public"]) - # make the python rules available -load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test") +load("@rules_python//python:defs.bzl", "py_test") # make the dependencies from requirements.txt available load("@knora_py_deps//:requirements.bzl", "requirement") +package(default_visibility = ["//visibility:public"]) + py_test( name = "test_langstring", - srcs = ["test_langstring.py"] + srcs = ["test_langstring.py"], ) py_test( name = "test_value", srcs = ["test_value.py"], deps = [ - "//knora/dsplib/models:value", "//knora/dsplib/models:group", - "//knora/dsplib/models:helpers" - ] + "//knora/dsplib/models:helpers", + "//knora/dsplib/models:value", + ], ) py_test( name = "test_id_to_iri", srcs = ["test_id_to_iri.py"], data = [ - "//testdata:testdata" - ] + "//testdata", + ], +) + +py_test( + name = "test_excel_to_resource", + srcs = ["test_excel_to_resource.py"], + data = [ + "//testdata", + ], ) diff --git a/test/unittests/test_excel_to_resource.py b/test/unittests/test_excel_to_resource.py new file mode 100644 index 000000000..7ada18735 --- /dev/null +++ b/test/unittests/test_excel_to_resource.py @@ -0,0 +1,58 @@ +"""unit tests for excel to resource""" +import os +import unittest + +from openpyxl import Workbook + +from knora.dsplib.utils import excel_to_json_resources as e2j + + +class TestExcelToResource(unittest.TestCase): + + def test_excel2json(self) -> None: + in_file = "testdata/Resources.xlsx" + out_file = "out_res.json" + e2j.resources_excel2json(in_file, out_file) + self.assertTrue(os.path.exists(out_file)) + + def test_extract_row(self) -> None: + wb = Workbook() + ws_classes = wb.create_sheet("classes") + res_name = "ClassA" + row = ( + res_name, + "Class A", + "", + "", + "", + "A comment on Class A", + "", + "", + "", + "Resource", + ) + for i, c in enumerate(row): + ws_classes.cell(row=2, column=i+1, value=c) + ws_class_a = wb.create_sheet(res_name) + ws_class_a["A2"] = "property1" + ws_class_a["B2"] = "1" + resource_dict = e2j._extract_row(row, wb) + expected_dict = { + 'name': 'ClassA', + 'labels': { + 'en': 'Class A' + }, + 'comments': { + 'en': 'A comment on Class A' + }, + 'super': 'Resource', + 'cardinalities': [{ + 'propname': ':property1', + 'cardinality': '1', + 'gui_order': 1 + }]} + self.assertDictEqual(resource_dict, expected_dict) + + +if __name__ == '__main__': + unittest.main() diff --git a/testdata/Properties.xlsx b/testdata/Properties.xlsx index 0212d9d19..cccc21a0a 100644 Binary files a/testdata/Properties.xlsx and b/testdata/Properties.xlsx differ diff --git a/testdata/Resources.xlsx b/testdata/Resources.xlsx index 8a5c21be5..662e4cf55 100644 Binary files a/testdata/Resources.xlsx and b/testdata/Resources.xlsx differ diff --git a/vars.mk b/vars.mk deleted file mode 100644 index 8caa20677..000000000 --- a/vars.mk +++ /dev/null @@ -1,6 +0,0 @@ -UNAME := $(shell uname) -ifeq ($(UNAME),Darwin) - DOCKERHOST := $(shell ifconfig en0 | grep inet | grep -v inet6 | cut -d ' ' -f2) -else - DOCKERHOST := $(shell ip -4 addr show docker0 | grep -Po 'inet \K[\d.]+') -endif