dasch-swiss · jnussbaum · Sep 19, 2022 · Sep 16, 2022 · Sep 16, 2022 · Sep 16, 2022
diff --git a/knora/dsp_tools.py b/knora/dsp_tools.py
@@ -6,9 +6,9 @@
 import sys
 from importlib.metadata import version
 
-from knora.dsplib.utils.excel_to_json_lists import list_excel2json, validate_lists_section_with_schema
-from knora.dsplib.utils.excel_to_json_properties import properties_excel2json
-from knora.dsplib.utils.excel_to_json_resources import resources_excel2json
+from knora.dsplib.utils.excel_to_json_lists import excel2lists, validate_lists_section_with_schema
+from knora.dsplib.utils.excel_to_json_properties import excel2properties
+from knora.dsplib.utils.excel_to_json_resources import excel2resources
 from knora.dsplib.utils.id_to_iri import id_to_iri
 from knora.dsplib.utils.onto_create_lists import create_lists
 from knora.dsplib.utils.onto_create_ontology import create_project
@@ -186,14 +186,14 @@ def program(user_args: list[str]) -> None:
                        verbose=args.verbose,
                        incremental=args.incremental)
     elif args.action == 'excel2lists':
-        list_excel2json(excelfolder=args.excelfolder,
-                        outfile=args.outfile)
+        excel2lists(excelfolder=args.excelfolder,
+                    outfile=args.outfile)
     elif args.action == 'excel2resources':
-        resources_excel2json(excelfile=args.excelfile,
-                             outfile=args.outfile)
+        excel2resources(excelfile=args.excelfile,
+                        outfile=args.outfile)
     elif args.action == 'excel2properties':
-        properties_excel2json(excelfile=args.excelfile,
-                              outfile=args.outfile)
+        excel2properties(excelfile=args.excelfile,
+                         outfile=args.outfile)
     elif args.action == 'id2iri':
         id_to_iri(xml_file=args.xmlfile,
                   json_file=args.jsonfile,

diff --git a/knora/dsplib/models/propertyelement.py b/knora/dsplib/models/propertyelement.py
@@ -0,0 +1,54 @@
+import dataclasses
+from typing import Union, Optional
+import pandas as pd
+import regex
+from knora.dsplib.models.helpers import BaseError
+
+
+@dataclasses.dataclass(frozen=True)
+class PropertyElement:
+    """
+    A PropertyElement object carries more information about a property value than the value itself.
+    The "value" is the value that could be passed to a method as plain string/int/float/bool. Use a PropertyElement
+    instead to define more precisely what attributes your <text> tag (for example) will have.
+
+    Args:
+        value: This is the content that will be written between the <text></text> tags (for example)
+        permissions: This is the permissions that your <text> tag (for example) will have
+        comment: This is the comment that your <text> tag (for example) will have
+        encoding: For <text> tags only. Can be "xml" or "utf8".
+
+    Examples:
+        See the difference between the first and the second example:
+
+        >>> make_text_prop(":testproperty", "first text")
+                <text-prop name=":testproperty">
+                    <text encoding="utf8" permissions="prop-default">
+                        first text
+                    </text>
+                </text-prop>
+        >>> make_text_prop(":testproperty", PropertyElement("first text", permissions="prop-restricted", encoding="xml"))
+                <text-prop name=":testproperty">
+                    <text encoding="xml" permissions="prop-restricted">
+                        first text
+                    </text>
+                </text-prop>
+    """
+    value: Union[str, int, float, bool]
+    permissions: str = "prop-default"
+    comment: Optional[str] = None
+    encoding: Optional[str] = None
+
+    def __post_init__(self) -> None:
+        if not any([
+            isinstance(self.value, int),
+            isinstance(self.value, float) and pd.notna(self.value),  # necessary because isinstance(np.nan, float)
+            isinstance(self.value, bool),
+            isinstance(self.value, str) and all([
+                regex.search(r"\p{L}|\d|_", self.value, flags=regex.UNICODE),
+                not bool(regex.search(r"^(none|<NA>|-|n/a)$", self.value, flags=regex.IGNORECASE))
+            ])
+        ]):
+            raise BaseError(f"'{self.value}' is not a valid value for a PropertyElement")
+        if self.encoding not in ["utf8", "xml", None]:
+            raise BaseError(f"'{self.encoding}' is not a valid encoding for a PropertyElement")
diff --git a/knora/dsplib/schemas/properties-only.json b/knora/dsplib/schemas/properties-only.json
@@ -67,17 +67,17 @@
                     "oneOf": [
                         {
                             "enum": [
-                                "TextValue",
+                                "BooleanValue",
                                 "ColorValue",
                                 "DateValue",
                                 "DecimalValue",
                                 "GeonameValue",
                                 "IntValue",
-                                "BooleanValue",
-                                "TimeValue",
-                                "UriValue",
                                 "IntervalValue",
                                 "ListValue",
+                                "TextValue",
+                                "TimeValue",
+                                "UriValue",
                                 "Resource",
                                 "Representation"
                             ]
@@ -96,11 +96,11 @@
                 "gui_element": {
                     "type": "string",
                     "enum": [
+                        "Checkbox",
                         "Colorpicker",
                         "Date",
                         "Geonames",
                         "Interval",
-                        "TimeStamp",
                         "List",
                         "Radio",
                         "Richtext",
@@ -109,8 +109,7 @@
                         "Slider",
                         "Spinbox",
                         "Textarea",
-                        "Checkbox",
-                        "Fileupload"
+                        "TimeStamp"
                     ]
                 },
                 "gui_attributes": {

diff --git a/knora/dsplib/utils/excel_to_json_lists.py b/knora/dsplib/utils/excel_to_json_lists.py
@@ -3,7 +3,6 @@
 import json
 import os
 import re
-import unicodedata
 from typing import Any, Union, Optional, Tuple
 
 import jsonschema
@@ -13,6 +12,7 @@
 import regex
 
 from knora.dsplib.models.helpers import BaseError
+from knora.dsplib.utils.shared_methods import simplify_name
 
 list_of_lists_of_previous_cell_values: list[list[str]] = []
 """Module level variable used to ensure that there are no duplicate node names"""
@@ -236,30 +236,6 @@ def _make_json_lists_from_excel(excel_file_paths: list[str], verbose: bool = Fal
     return finished_lists
 
 
-def simplify_name(value: str) -> str:
-    """
-    Simplifies a given value in order to use it as node name
-
-    Args:
-        value: The value to be simplified
-
-    Returns:
-        str: The simplified value
-    """
-    simplified_value = str(value).lower()
-
-    # normalize characters (p.ex. ä becomes a)
-    simplified_value = unicodedata.normalize("NFKD", simplified_value)
-
-    # replace forward slash and whitespace with a dash
-    simplified_value = re.sub("[/\\s]+", "-", simplified_value)
-
-    # delete all characters which are not letters, numbers or dashes
-    simplified_value = re.sub("[^A-Za-z0-9\\-]+", "", simplified_value)
-
-    return simplified_value
-
-
 def validate_lists_section_with_schema(
     path_to_json_project_file: Optional[str] = None,
     lists_section: Optional[list[dict[str, Any]]] = None
@@ -273,7 +249,7 @@ def validate_lists_section_with_schema(
         lists_section: the "lists" section as Python object
 
     Returns:
-        True if the list passed validation. Otherwise, a BaseError with a detailed error report is raised
+        True if the "lists" section passed validation. Otherwise, a BaseError with a detailed error report is raised
     """
     if bool(path_to_json_project_file) == bool(lists_section):
         raise BaseError("Validation of the 'lists' section works only if exactly one of the two arguments is given.")
@@ -283,12 +259,15 @@ def validate_lists_section_with_schema(
     if path_to_json_project_file:
         with open(path_to_json_project_file) as f:
             project = json.load(f)
-            lists_section = project["project"]["lists"]
+            lists_section = project["project"].get("lists")
+            if not lists_section:
+                raise BaseError(f"Cannot validate \"lists\" section of {path_to_json_project_file}, because there is "
+                                f"no \"lists\" section in this file.")
 
     try:
         jsonschema.validate(instance={"lists": lists_section}, schema=lists_schema)
     except jsonschema.exceptions.ValidationError as err:
-        raise BaseError(f'"Lists" section did not pass validation. The error message is: {err.message}\n'
+        raise BaseError(f'"lists" section did not pass validation. The error message is: {err.message}\n'
                         f'The error occurred at {err.json_path}')
     return True
 
@@ -318,16 +297,16 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]:
     return excel_file_paths
 
 
-def list_excel2json(excelfolder: str, outfile: str) -> None:
+def excel2lists(excelfolder: str, outfile: str) -> list[dict[str, Any]]:
     """
-    This method writes a JSON file with a "lists" section that can later be inserted into a JSON project file.
+    Converts lists described in Excel files into a "lists" section that can be inserted into a JSON project file.
 
     Args:
         excelfolder: path to the folder containing the Excel file(s)
         outfile: path to the JSON file the output is written into
 
     Returns:
-        None
+        the "lists" section as Python list
     """
     excel_file_paths = _extract_excel_file_paths(excelfolder)
     print("The following Excel files will be processed:")
@@ -336,5 +315,7 @@ def list_excel2json(excelfolder: str, outfile: str) -> None:
     validate_lists_section_with_schema(lists_section=finished_lists)
 
     with open(outfile, "w", encoding="utf-8") as fp:
-        json.dump({"lists": finished_lists}, fp, indent=4, sort_keys=False, ensure_ascii=False)
-        print("List was created successfully and written to file:", outfile)
+        json.dump({"lists": finished_lists}, fp, indent=4, ensure_ascii=False)
+        print('"lists" section was created successfully and written to file:', outfile)
+
+    return finished_lists
diff --git a/knora/dsplib/utils/excel_to_json_properties.py b/knora/dsplib/utils/excel_to_json_properties.py
@@ -1,37 +1,32 @@
 import json
-import os
 import re
 from typing import Any
-
 import jsonschema
 import pandas as pd
 
-from knora.dsplib.utils.excel_to_json_resources import prepare_dataframe
+from knora.dsplib.models.helpers import BaseError
+from knora.dsplib.utils.shared_methods import prepare_dataframe
 
 languages = ["en", "de", "fr", "it", "rm"]
 
 
-def _validate_properties_with_schema(json_file: str) -> bool:
+def _validate_properties_with_schema(properties_list: list[dict[str, Any]]) -> bool:
     """
-    This function checks if the json properties are valid according to the schema.
+    This function checks if the "properties" section of a JSON project file is valid according to the schema.
 
     Args:
-        json_file: the json with the properties to be validated
+        properties_list: the "properties" section of a JSON project as a list of dicts
 
     Returns:
-        True if the data passed validation, False otherwise
-
+        True if the "properties" section passed validation. Otherwise, a BaseError with a detailed error report is raised.
     """
-    current_dir = os.path.dirname(os.path.realpath(__file__))
-    with open(os.path.join(current_dir, "../schemas/properties-only.json")) as schema:
+    with open("knora/dsplib/schemas/properties-only.json") as schema:
         properties_schema = json.load(schema)
-
     try:
-        jsonschema.validate(instance=json_file, schema=properties_schema)
+        jsonschema.validate(instance=properties_list, schema=properties_schema)
     except jsonschema.exceptions.ValidationError as err:
-        print(err)
-        return False
-    print("Properties data passed schema validation.")
+        raise BaseError(f'"properties" section did not pass validation. The error message is: {err.message}\n'
+                        f'The error occurred at {err.json_path}')
     return True
 
 
@@ -42,19 +37,19 @@ def _row2prop(row: pd.Series, row_count: int, excelfile: str) -> dict[str, Any]:
     Args:
         row: row from a pandas DataFrame that defines a property
         row_count: row number of Excel file
-        excelfile: name of the original excel file
+        excelfile: name of the original Excel file
 
     Returns:
         dict object of the property
     """
 
+    # extract the elements that are necessary to build the property
     name = row["name"]
     supers = [s.strip() for s in row["super"].split(",")]
     _object = row["object"]
     labels = {lang: row[lang] for lang in languages if row.get(lang)}
     comments = {lang: row[f"comment_{lang}"] for lang in languages if row.get(f"comment_{lang}")}
     gui_element = row["gui_element"]
-
     gui_attributes = dict()
     if row.get("hlist"):
         gui_attributes["hlist"] = row["hlist"]
@@ -71,12 +66,13 @@ def _row2prop(row: pd.Series, row_count: int, excelfile: str) -> dict[str, Any]:
                 val = int(val)
             gui_attributes[attr] = val
 
-    # build the dict structure of this property and append it to the list of properties
+    # build the dict structure of this property
     _property = {
         "name": name,
         "super": supers,
         "object": _object,
-        "labels": labels}
+        "labels": labels
+    }
     if comments:
         _property["comments"] = comments
     _property["gui_element"] = gui_element
@@ -86,16 +82,17 @@ def _row2prop(row: pd.Series, row_count: int, excelfile: str) -> dict[str, Any]:
     return _property
 
 
-def properties_excel2json(excelfile: str, outfile: str) -> None:
+def excel2properties(excelfile: str, outfile: str) -> list[dict[str, Any]]:
     """
-    Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology
+    Converts properties described in an Excel file into a "properties" section which can be inserted into a JSON
+    project file.
 
     Args:
         excelfile: path to the Excel file containing the properties
-        outfile: path to the output JSON file containing the properties section for the ontology
+        outfile: path to the JSON file the output is written into
 
     Returns:
-        None
+        the "properties" section as Python list
     """
 
     # load file
@@ -109,10 +106,9 @@ def properties_excel2json(excelfile: str, outfile: str) -> None:
     props = [_row2prop(row, i, excelfile) for i, row in df.iterrows()]
 
     # write final list to JSON file if list passed validation
-    if _validate_properties_with_schema(json.loads(json.dumps(props, indent=4))):
-        with open(file=outfile, mode="w+", encoding="utf-8") as file:
-            file.write('"properties": ')
-            json.dump(props, file, indent=4)
-            print("Properties file was created successfully and written to file: ", outfile)
-    else:
-        print("Properties data is not valid according to schema.")
+    _validate_properties_with_schema(props)
+    with open(file=outfile, mode="w", encoding="utf-8") as file:
+        json.dump({"properties": props}, file, indent=4, ensure_ascii=False)
+        print('"properties" section was created successfully and written to file:', outfile)
+
+    return props