feat(excel-to-json): allow comments in class and property definitions (…

…#111) * Updating code to handle rdfs:comments in 4 languages in classe and property definition * Updating Excel examples to handle rdfs:comment in 4 languages * chore: add dependencies for python 3.10 compatibility * refactor: minor code changes * docs: update tooling documentation * docs: update documentation according to changes, add excel template files * chore: remove vars.mk * refactor: remove code smell * refactor: reduce code smell * test: add unit test for creating resources from excel
dasch-swiss · Jan 4, 2022 · 807959f · 807959f
1 parent 5604a5b
commit 807959f
Show file tree

Hide file tree

Showing 16 changed files with 185 additions and 93 deletions.
diff --git a/Makefile b/Makefile
@@ -3,8 +3,6 @@
 THIS_FILE := $(lastword $(MAKEFILE_LIST))
 CURRENT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
 
-include vars.mk
-
 #################################
 # Make targets for dsp-tools
 #################################

diff --git a/README.md b/README.md
@@ -52,7 +52,6 @@ in `pyproject.toml` in the root directory of the project.
 ```toml
 [tool.autopep8]
 max_line_length = 180
-in-place = true
 experimental = true
 
 [tool.mypy]
@@ -67,8 +66,11 @@ and `mypy --config-file pyproject.toml
 [file path]`.
 
 If you are using PyCharm we recommend installing autopep8 as external tool. You can then use it with right-click on the
-file > `External Tools` > `autopep8` to reformat files in-place. mypy is available as
-[plugin](https://plugins.jetbrains.com/plugin/11086-mypy).
+file > `External Tools` > `autopep8` to reformat files in-place. Due to compatibility issues with VSCode, the argument 
+`--in-place=true` can not be declared in the `pyproject.toml` and needs to be passed to the external tool in the PyCharm settings.  
+mypy is available as [plugin](https://plugins.jetbrains.com/plugin/11086-mypy).
+
+In VSCode, both mypy and autopep8 can be set up as default linter and formatter through the python extension.
 
 For formatting Markdown files (*.md) we use the default styling configuration provided by PyCharm.
 

diff --git a/docs/assets/images/img-properties-example.png b/docs/assets/images/img-properties-example.png
diff --git a/docs/assets/images/img-resources-example-1.png b/docs/assets/images/img-resources-example-1.png
diff --git a/docs/assets/templates/properties_template.xlsx b/docs/assets/templates/properties_template.xlsx
diff --git a/docs/assets/templates/resources_template.xlsx b/docs/assets/templates/resources_template.xlsx
diff --git a/docs/dsp-tools-excel.md b/docs/dsp-tools-excel.md
@@ -11,6 +11,8 @@ list from an Excel file.
 With dsp-tools the `resources` section used in a data model (JSON) can be created from an Excel file. Only `XLSX` files
 are allowed. The `resources` section can be inserted into the ontology file and then be uploaded onto a DSP server.
 
+**An Excel file template can be found [here](assets/templates/resources_template.xlsx). It is recommended to work from the template.**
+
 The expected worksheets of the Excel file are:
 
 - `classes`: a table with all resource classes intended to be used in the resulting JSON
@@ -26,6 +28,7 @@ The expected columns are:
 - `name` : The name of the resource
 - `super` : The base resource of the resource
 - `en`, `de`, `fr`, `it` : The labels of the resource in different languages, at least one language has to be provided
+- `comment_en`, `comment_de`, `comment_fr`, `comment_it`: optional comments in the respective language 
 
 All other worksheets, one for each resource class, have the following form:
 ![img-resources-example-2.png](assets/images/img-resources-example-2.png){ width=50% }
@@ -43,6 +46,8 @@ With dsp-tools the `properties` section used in a data model (JSON) can be creat
 worksheet of the Excel file is considered and only XLSX files are allowed. The `properties` section can be inserted into
 the ontology file and then be uploaded onto a DSP server.
 
+**An Excel file template can be found [here](assets/templates/properties_template.xlsx). It is recommended to work from the template.**
+
 The Excel sheet must have the following format:
 ![img-properties-example.png](assets/images/img-properties-example.png)
 
@@ -52,6 +57,7 @@ The expected columns are:
 - `super` : The base property of the property
 - `object` : The resource the property refers to if it is a link property (property derived from `hasLinkTo`)
 - `en`, `de`, `fr`, `it` : The labels of the property in different languages, at least one language has to be provided
+- `comment_en`, `comment_de`, `comment_fr`, `comment_it`: optional comments in the respective language 
 - `gui_element` : The GUI element for the property
 - `hlist` : In case of list values the according list
 

diff --git a/knora/dsplib/utils/excel_to_json_properties.py b/knora/dsplib/utils/excel_to_json_properties.py
@@ -1,21 +1,22 @@
 import json
 import os
+from typing import Any
 
 import jsonschema
 from openpyxl import load_workbook
 
 
 def validate_properties_with_schema(json_file: str) -> bool:
     """
-        This function checks if the json properties are valid according to the schema.
+    This function checks if the json properties are valid according to the schema.
 
-        Args:
-            json_file: the json with the properties to be validated
+    Args:
+        json_file: the json with the properties to be validated
 
-        Returns:
-            True if the data passed validation, False otherwise
+    Returns:
+        True if the data passed validation, False otherwise
 
-        """
+    """
     current_dir = os.path.dirname(os.path.realpath(__file__))
     with open(os.path.join(current_dir, '../schemas/properties-only.json')) as schema:
         properties_schema = json.load(schema)
@@ -29,21 +30,21 @@ def validate_properties_with_schema(json_file: str) -> bool:
     return True
 
 
-def properties_excel2json(excelfile: str, outfile: str):
+def properties_excel2json(excelfile: str, outfile: str) -> list[dict[str, Any]]:
     """
-        Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology
+    Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology
 
-        Args:
-            excelfile: path to the Excel file containing the properties
-            outfile: path to the output JSON file containing the properties section for the ontology
+    Args:
+        excelfile: path to the Excel file containing the properties
+        outfile: path to the output JSON file containing the properties section for the ontology
 
-        Returns:
-            None
+    Returns:
+        List(JSON): a list with a dict (JSON) for each row in the Excel file
     """
     # load file
     wb = load_workbook(filename=excelfile, read_only=True)
     sheet = wb.worksheets[0]
-    props = [row_to_prop(row) for row in sheet.iter_rows(min_row=2, values_only=True, max_col=9)]
+    props = [row_to_prop(row) for row in sheet.iter_rows(min_row=2, values_only=True, max_col=13)]
 
     prefix = '"properties":'
 
@@ -59,7 +60,7 @@ def properties_excel2json(excelfile: str, outfile: str):
     return props
 
 
-def row_to_prop(row):
+def row_to_prop(row: tuple[str, str, str, str, str, str, str, str, str, str, str, str, str]) -> dict[str, Any]:
     """
     Parses the row of an Excel sheet and makes a property from it
 
@@ -69,7 +70,7 @@ def row_to_prop(row):
     Returns:
         prop (JSON): the property in JSON format
     """
-    name, super_, object_, en, de, fr, it, gui_element, hlist = row
+    name, super_, object_, en, de, fr, it, comment_en, comment_de, comment_fr, comment_it, gui_element, hlist = row
     labels = {}
     if en:
         labels['en'] = en
@@ -81,11 +82,21 @@ def row_to_prop(row):
         labels['it'] = it
     if not labels:
         raise Exception(f"No label given in any of the four languages: {name}")
+    comments = {}
+    if comment_en:
+        comments['en'] = comment_en
+    if comment_de:
+        comments['de'] = comment_de
+    if comment_fr:
+        comments['fr'] = comment_fr
+    if comment_it:
+        comments['it'] = comment_it
     prop = {
         'name': name,
         'super': [super_],
         'object': object_,
         'labels': labels,
+        'comments': comments,
         'gui_element': gui_element
     }
     if hlist:

diff --git a/knora/dsplib/utils/excel_to_json_resources.py b/knora/dsplib/utils/excel_to_json_resources.py
@@ -1,21 +1,23 @@
 import json
 import os
+from typing import Any
 
 import jsonschema
 from openpyxl import load_workbook
+from openpyxl.workbook.workbook import Workbook
 
 
 def validate_resources_with_schema(json_file: str) -> bool:
     """
-        This function checks if the json resources are valid according to the schema.
+    This function checks if the json resources are valid according to the schema.
 
-        Args:
-            json_file: the json with the resources to be validated
+    Args:
+        json_file: the json with the resources to be validated
 
-        Returns:
-            True if the data passed validation, False otherwise
+    Returns:
+        True if the data passed validation, False otherwise
 
-        """
+    """
     current_dir = os.path.dirname(os.path.realpath(__file__))
     with open(os.path.join(current_dir, '../schemas/resources-only.json')) as schema:
         resources_schema = json.load(schema)
@@ -29,17 +31,18 @@ def validate_resources_with_schema(json_file: str) -> bool:
     return True
 
 
-def resources_excel2json(excelfile: str, outfile: str):
+def resources_excel2json(excelfile: str, outfile: str) -> None:
     """
-        Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology
+    Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology
 
-        Args:
-            excelfile: path to the Excel file containing the properties
-            outfile: path to the output JSON file containing the properties section for the ontology
+    Args:
+        excelfile: path to the Excel file containing the properties
+        outfile: path to the output JSON file containing the properties section for the ontology
 
-        Returns:
-            None
+    Returns:
+        None
     """
+
     # load file
     wb = load_workbook(excelfile, read_only=True)
 
@@ -48,49 +51,7 @@ def resources_excel2json(excelfile: str, outfile: str):
     resource_list = [c for c in sheet.iter_rows(min_row=2, values_only=True)]
 
     prefix = '"resources":'
-    resources = []
-    # for each resource in resources overview
-    for res in resource_list:
-        # get name
-        name = res[0]
-        # get labels
-        labels = {}
-        if res[1]:
-            labels['en'] = res[1]
-        if res[2]:
-            labels['de'] = res[2]
-        if res[3]:
-            labels['fr'] = res[3]
-        if res[4]:
-            labels['it'] = res[4]
-        # get super
-        sup = res[5]
-
-        # load details for this resource
-        sh = wb[name]
-        property_list = [c for c in sh.iter_rows(min_row=2, values_only=True)]
-
-        cards = []
-        # for each of the detail sheets
-        for i, prop in enumerate(property_list):
-            # get name and cardinality.
-            # GUI-order is equal to order in the sheet.
-            property_ = {
-                "propname": ":" + prop[0],
-                "cardinality": str(prop[1]),
-                "gui_order": i + 1
-            }
-            cards.append(property_)
-
-        # build resource dict
-        resource = {
-            "name": name,
-            "labels": labels,
-            "super": sup,
-            "cardinalities": cards
-        }
-        # append to resources list
-        resources.append(resource)
+    resources = [_extract_row(res, wb) for res in resource_list]
 
     if validate_resources_with_schema(json.loads(json.dumps(resources, indent=4))):
         # write final list to JSON file if list passed validation
@@ -100,3 +61,56 @@ def resources_excel2json(excelfile: str, outfile: str):
             print('Resource file was created successfully and written to file:', outfile)
     else:
         print('Resource data is not valid according to schema.')
+
+
+def _extract_row(row: tuple[str, str, str, str, str, str, str, str, str, str], wb: Workbook) -> dict[str, Any]:
+    """build a property dict from a row of the excel file"""
+    # get name
+    name = row[0]
+    # get labels
+    labels = {}
+    if row[1]:
+        labels['en'] = row[1]
+    if row[2]:
+        labels['de'] = row[2]
+    if row[3]:
+        labels['fr'] = row[3]
+    if row[4]:
+        labels['it'] = row[4]
+    # get comments
+    comments = {}
+    if row[5]:
+        comments['en'] = row[5]
+    if row[6]:
+        comments['de'] = row[6]
+    if row[7]:
+        comments['fr'] = row[7]
+    if row[8]:
+        comments['it'] = row[8]
+    # get super
+    sup = row[9]
+
+    # load details for this resource
+    sh = wb[name]
+    property_list = [c for c in sh.iter_rows(min_row=2, values_only=True)]
+
+    cards = []
+    # for each of the detail sheets
+    for i, prop in enumerate(property_list):
+        # get name and cardinality.
+        # GUI-order is equal to order in the sheet.
+        property_ = {
+            "propname": ":" + prop[0],
+            "cardinality": str(prop[1]),
+            "gui_order": i + 1
+        }
+        cards.append(property_)
+
+    # return resource dict
+    return {
+        "name": name,
+        "labels": labels,
+        "comments": comments,
+        "super": sup,
+        "cardinalities": cards
+    }
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,5 @@
 [tool.autopep8]
 max_line_length = 180
-in-place = true
 experimental = true
 
 [tool.mypy]

diff --git a/requirements.txt b/requirements.txt
@@ -11,6 +11,7 @@ future==0.18.2
 ghp-import==2.0.2
 idna==3.3
 importlib-metadata==4.8.1
+importlib-resources==5.4.0
 isodate==0.6.0
 Jinja2==3.0.2
 joblib==1.1.0
@@ -53,6 +54,7 @@ six==1.16.0
 tornado==6.1
 tqdm==4.62.3
 twine==3.5.0
+typing-extensions==4.0.1
 urllib3==1.26.7
 validators==0.18.2
 watchdog==2.1.6

diff --git a/test/unittests/BUILD.bazel b/test/unittests/BUILD.bazel
@@ -1,30 +1,38 @@
-package(default_visibility = ["//visibility:public"])
-
 # make the python rules available
-load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test")
+load("@rules_python//python:defs.bzl", "py_test")
 
 # make the dependencies from requirements.txt available
 load("@knora_py_deps//:requirements.bzl", "requirement")
 
+package(default_visibility = ["//visibility:public"])
+
 py_test(
     name = "test_langstring",
-    srcs = ["test_langstring.py"]
+    srcs = ["test_langstring.py"],
 )
 
 py_test(
     name = "test_value",
     srcs = ["test_value.py"],
     deps = [
-        "//knora/dsplib/models:value",
         "//knora/dsplib/models:group",
-        "//knora/dsplib/models:helpers"
-    ]
+        "//knora/dsplib/models:helpers",
+        "//knora/dsplib/models:value",
+    ],
 )
 
 py_test(
     name = "test_id_to_iri",
     srcs = ["test_id_to_iri.py"],
     data = [
-            "//testdata:testdata"
-        ]
+        "//testdata",
+    ],
+)
+
+py_test(
+    name = "test_excel_to_resource",
+    srcs = ["test_excel_to_resource.py"],
+    data = [
+        "//testdata",
+    ],
 )