From d2c2e085813680007a5228c1ef5bab82b4f1a6c7 Mon Sep 17 00:00:00 2001
From: Johannes Nussbaum <39048939+jnussbaum@users.noreply.github.com>
Date: Mon, 19 Sep 2022 17:22:52 +0200
Subject: [PATCH] chore: tidy up excel2lists, excel2resources, excel2properties
 (DEV-1352) (#229)

---
 docs/dsp-tools-excel2xml.md                   |   3 +-
 knora/dsp_tools.py                            |  20 +--
 knora/dsplib/models/propertyelement.py        |  54 +++++++++
 knora/dsplib/schemas/properties-only.json     |  13 +-
 knora/dsplib/utils/excel_to_json_lists.py     |  56 ++++-----
 .../dsplib/utils/excel_to_json_properties.py  |  61 +++++-----
 knora/dsplib/utils/excel_to_json_resources.py |  89 +++++---------
 knora/dsplib/utils/onto_create_lists.py       |   2 +-
 knora/dsplib/utils/onto_create_ontology.py    |   2 +-
 .../utils/{shared_methods.py => shared.py}    | 101 +++++++++++++++-
 knora/dsplib/utils/xml_upload.py              |   6 +-
 knora/excel2xml.py                            |  77 +-----------
 test/e2e/test_tools.py                        |  16 +--
 test/unittests/test_excel2xml.py              |  12 --
 test/unittests/test_excel_to_json_lists.py    |  97 +++++++++++++--
 ...es.py => test_excel_to_json_properties.py} |  30 ++---
 ...rce.py => test_excel_to_json_resources.py} |  53 +++-----
 test/unittests/test_shared_methods.py         |  44 +++++++
 testdata/{single_list => list_single}/de.xlsx | Bin
 testdata/{single_list => list_single}/en.xlsx | Bin
 .../de.xlsx                                   | Bin
 .../en.xlsx                                   | Bin
 .../fr.xlsx                                   | Bin
 .../de.xlsx                                   | Bin
 .../en.xlsx                                   | Bin
 .../fr.xlsx                                   | Bin
 .../de.xlsx                                   | Bin
 .../de.xlsx                                   | Bin
 .../en.xlsx                                   | Bin
 .../fr.xlsx                                   | Bin
 .../lists_multilingual_output_expected.json   | 102 ++++++++++++++++
 testdata/lists_section_expanded.json          | 114 ++++++++++++++++++
 testdata/test-project-systematic.json         |   2 +-
 33 files changed, 640 insertions(+), 314 deletions(-)
 create mode 100644 knora/dsplib/models/propertyelement.py
 rename knora/dsplib/utils/{shared_methods.py => shared.py} (50%)
 rename test/unittests/{test_excel_to_properties.py => test_excel_to_json_properties.py} (87%)
 rename test/unittests/{test_excel_to_resource.py => test_excel_to_json_resources.py} (67%)
 create mode 100644 test/unittests/test_shared_methods.py
 rename testdata/{single_list => list_single}/de.xlsx (100%)
 rename testdata/{single_list => list_single}/en.xlsx (100%)
 rename testdata/{invalid_lists_1 => lists_invalid_1}/de.xlsx (100%)
 rename testdata/{invalid_lists_1 => lists_invalid_1}/en.xlsx (100%)
 rename testdata/{invalid_lists_1 => lists_invalid_1}/fr.xlsx (100%)
 rename testdata/{invalid_lists_2 => lists_invalid_2}/de.xlsx (100%)
 rename testdata/{invalid_lists_2 => lists_invalid_2}/en.xlsx (100%)
 rename testdata/{invalid_lists_2 => lists_invalid_2}/fr.xlsx (100%)
 rename testdata/{monolingual_lists => lists_monolingual}/de.xlsx (100%)
 rename testdata/{multilingual_lists => lists_multilingual}/de.xlsx (100%)
 rename testdata/{multilingual_lists => lists_multilingual}/en.xlsx (100%)
 rename testdata/{multilingual_lists => lists_multilingual}/fr.xlsx (100%)
 create mode 100644 testdata/lists_multilingual_output_expected.json
 create mode 100644 testdata/lists_section_expanded.json

diff --git a/docs/dsp-tools-excel2xml.md b/docs/dsp-tools-excel2xml.md
index 8f86e73db..5cc21d200 100644
--- a/docs/dsp-tools-excel2xml.md
+++ b/docs/dsp-tools-excel2xml.md
@@ -63,7 +63,8 @@ For `make_boolean_prop(cell)`, the following formats are supported:
 #### Check if a cell contains a usable value
 The method `check_notna(cell)` checks a value if it is usable in the context of data archiving. A value is considered 
 usable if it is
- - a number (integer or float, but not np.nan)
+
+ - a number (integer or float, but not numpy.nan)
  - a boolean
  - a string with at least one Unicode letter, underscore, or number, but not "None", "<NA>", "N/A", or "-"
  - a PropertyElement whose "value" fulfills the above criteria
diff --git a/knora/dsp_tools.py b/knora/dsp_tools.py
index 0dc7c5b8b..6f0f9034b 100644
--- a/knora/dsp_tools.py
+++ b/knora/dsp_tools.py
@@ -6,16 +6,16 @@
 import sys
 from importlib.metadata import version
 
-from knora.dsplib.utils.excel_to_json_lists import list_excel2json, validate_lists_section_with_schema
-from knora.dsplib.utils.excel_to_json_properties import properties_excel2json
-from knora.dsplib.utils.excel_to_json_resources import resources_excel2json
+from knora.dsplib.utils.excel_to_json_lists import excel2lists, validate_lists_section_with_schema
+from knora.dsplib.utils.excel_to_json_properties import excel2properties
+from knora.dsplib.utils.excel_to_json_resources import excel2resources
 from knora.dsplib.utils.id_to_iri import id_to_iri
 from knora.dsplib.utils.onto_create_lists import create_lists
 from knora.dsplib.utils.onto_create_ontology import create_project
 from knora.dsplib.utils.onto_get import get_ontology
 from knora.dsplib.utils.onto_validate import validate_project
 from knora.dsplib.utils.xml_upload import xml_upload
-from knora.dsplib.utils.shared_methods import validate_xml_against_schema
+from knora.dsplib.utils.shared import validate_xml_against_schema
 from knora.excel2xml import excel2xml
 
 
@@ -186,14 +186,14 @@ def program(user_args: list[str]) -> None:
                        verbose=args.verbose,
                        incremental=args.incremental)
     elif args.action == 'excel2lists':
-        list_excel2json(excelfolder=args.excelfolder,
-                        outfile=args.outfile)
+        excel2lists(excelfolder=args.excelfolder,
+                    outfile=args.outfile)
     elif args.action == 'excel2resources':
-        resources_excel2json(excelfile=args.excelfile,
-                             outfile=args.outfile)
+        excel2resources(excelfile=args.excelfile,
+                        outfile=args.outfile)
     elif args.action == 'excel2properties':
-        properties_excel2json(excelfile=args.excelfile,
-                              outfile=args.outfile)
+        excel2properties(excelfile=args.excelfile,
+                         outfile=args.outfile)
     elif args.action == 'id2iri':
         id_to_iri(xml_file=args.xmlfile,
                   json_file=args.jsonfile,
diff --git a/knora/dsplib/models/propertyelement.py b/knora/dsplib/models/propertyelement.py
new file mode 100644
index 000000000..eb3291a42
--- /dev/null
+++ b/knora/dsplib/models/propertyelement.py
@@ -0,0 +1,54 @@
+from typing import Union, Optional
+import pandas as pd
+import regex
+from dataclasses import dataclass
+from knora.dsplib.models.helpers import BaseError
+
+
+@dataclass(frozen=True)
+class PropertyElement:
+    """
+    A PropertyElement object carries more information about a property value than the value itself.
+    The "value" is the value that could be passed to a method as plain string/int/float/bool. Use a PropertyElement
+    instead to define more precisely what attributes your <text> tag (for example) will have.
+
+    Args:
+        value: This is the content that will be written between the <text></text> tags (for example)
+        permissions: This is the permissions that your <text> tag (for example) will have
+        comment: This is the comment that your <text> tag (for example) will have
+        encoding: For <text> tags only. Can be "xml" or "utf8".
+
+    Examples:
+        See the difference between the first and the second example:
+
+        >>> make_text_prop(":testproperty", "first text")
+                <text-prop name=":testproperty">
+                    <text encoding="utf8" permissions="prop-default">
+                        first text
+                    </text>
+                </text-prop>
+        >>> make_text_prop(":testproperty", PropertyElement("first text", permissions="prop-restricted", encoding="xml"))
+                <text-prop name=":testproperty">
+                    <text encoding="xml" permissions="prop-restricted">
+                        first text
+                    </text>
+                </text-prop>
+    """
+    value: Union[str, int, float, bool]
+    permissions: str = "prop-default"
+    comment: Optional[str] = None
+    encoding: Optional[str] = None
+
+    def __post_init__(self) -> None:
+        if not any([
+            isinstance(self.value, int),
+            isinstance(self.value, float) and pd.notna(self.value),  # necessary because isinstance(np.nan, float)
+            isinstance(self.value, bool),
+            isinstance(self.value, str) and all([
+                regex.search(r"\p{L}|\d|_", self.value, flags=regex.UNICODE),
+                not bool(regex.search(r"^(none|<NA>|-|n/a)$", self.value, flags=regex.IGNORECASE))
+            ])
+        ]):
+            raise BaseError(f"'{self.value}' is not a valid value for a PropertyElement")
+        if self.encoding not in ["utf8", "xml", None]:
+            raise BaseError(f"'{self.encoding}' is not a valid encoding for a PropertyElement")
diff --git a/knora/dsplib/schemas/properties-only.json b/knora/dsplib/schemas/properties-only.json
index 6720d6758..dc16fe9a0 100644
--- a/knora/dsplib/schemas/properties-only.json
+++ b/knora/dsplib/schemas/properties-only.json
@@ -67,17 +67,17 @@
                     "oneOf": [
                         {
                             "enum": [
-                                "TextValue",
+                                "BooleanValue",
                                 "ColorValue",
                                 "DateValue",
                                 "DecimalValue",
                                 "GeonameValue",
                                 "IntValue",
-                                "BooleanValue",
-                                "TimeValue",
-                                "UriValue",
                                 "IntervalValue",
                                 "ListValue",
+                                "TextValue",
+                                "TimeValue",
+                                "UriValue",
                                 "Resource",
                                 "Representation"
                             ]
@@ -96,11 +96,11 @@
                 "gui_element": {
                     "type": "string",
                     "enum": [
+                        "Checkbox",
                         "Colorpicker",
                         "Date",
                         "Geonames",
                         "Interval",
-                        "TimeStamp",
                         "List",
                         "Radio",
                         "Richtext",
@@ -109,8 +109,7 @@
                         "Slider",
                         "Spinbox",
                         "Textarea",
-                        "Checkbox",
-                        "Fileupload"
+                        "TimeStamp"
                     ]
                 },
                 "gui_attributes": {
diff --git a/knora/dsplib/utils/excel_to_json_lists.py b/knora/dsplib/utils/excel_to_json_lists.py
index 998731624..f3b84ff00 100644
--- a/knora/dsplib/utils/excel_to_json_lists.py
+++ b/knora/dsplib/utils/excel_to_json_lists.py
@@ -3,7 +3,6 @@
 import json
 import os
 import re
-import unicodedata
 from typing import Any, Union, Optional, Tuple
 
 import jsonschema
@@ -13,6 +12,7 @@
 import regex
 
 from knora.dsplib.models.helpers import BaseError
+from knora.dsplib.utils.shared import simplify_name
 
 list_of_lists_of_previous_cell_values: list[list[str]] = []
 """Module level variable used to ensure that there are no duplicate node names"""
@@ -236,30 +236,6 @@ def _make_json_lists_from_excel(excel_file_paths: list[str], verbose: bool = Fal
     return finished_lists
 
 
-def simplify_name(value: str) -> str:
-    """
-    Simplifies a given value in order to use it as node name
-
-    Args:
-        value: The value to be simplified
-
-    Returns:
-        str: The simplified value
-    """
-    simplified_value = str(value).lower()
-
-    # normalize characters (p.ex. ä becomes a)
-    simplified_value = unicodedata.normalize("NFKD", simplified_value)
-
-    # replace forward slash and whitespace with a dash
-    simplified_value = re.sub("[/\\s]+", "-", simplified_value)
-
-    # delete all characters which are not letters, numbers or dashes
-    simplified_value = re.sub("[^A-Za-z0-9\\-]+", "", simplified_value)
-
-    return simplified_value
-
-
 def validate_lists_section_with_schema(
     path_to_json_project_file: Optional[str] = None,
     lists_section: Optional[list[dict[str, Any]]] = None
@@ -273,7 +249,7 @@ def validate_lists_section_with_schema(
         lists_section: the "lists" section as Python object
 
     Returns:
-        True if the list passed validation. Otherwise, a BaseError with a detailed error report is raised
+        True if the "lists" section passed validation. Otherwise, a BaseError with a detailed error report is raised
     """
     if bool(path_to_json_project_file) == bool(lists_section):
         raise BaseError("Validation of the 'lists' section works only if exactly one of the two arguments is given.")
@@ -283,12 +259,15 @@ def validate_lists_section_with_schema(
     if path_to_json_project_file:
         with open(path_to_json_project_file) as f:
             project = json.load(f)
-            lists_section = project["project"]["lists"]
+            lists_section = project["project"].get("lists")
+            if not lists_section:
+                raise BaseError(f"Cannot validate \"lists\" section of {path_to_json_project_file}, because there is "
+                                f"no \"lists\" section in this file.")
 
     try:
         jsonschema.validate(instance={"lists": lists_section}, schema=lists_schema)
     except jsonschema.exceptions.ValidationError as err:
-        raise BaseError(f'"Lists" section did not pass validation. The error message is: {err.message}\n'
+        raise BaseError(f'"lists" section did not pass validation. The error message is: {err.message}\n'
                         f'The error occurred at {err.json_path}')
     return True
 
@@ -318,23 +297,30 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]:
     return excel_file_paths
 
 
-def list_excel2json(excelfolder: str, outfile: str) -> None:
+def excel2lists(excelfolder: str, path_to_output_file: Optional[str] = None) -> list[dict[str, Any]]:
     """
-    This method writes a JSON file with a "lists" section that can later be inserted into a JSON project file.
+    Converts lists described in Excel files into a "lists" section that can be inserted into a JSON project file.
 
     Args:
         excelfolder: path to the folder containing the Excel file(s)
-        outfile: path to the JSON file the output is written into
+        path_to_output_file: if provided, the output is written into this JSON file
 
     Returns:
-        None
+        the "lists" section as Python list
     """
+    # read the data
     excel_file_paths = _extract_excel_file_paths(excelfolder)
     print("The following Excel files will be processed:")
     [print(f" - {filename}") for filename in excel_file_paths]
+    
+    # construct the "lists" section
     finished_lists = _make_json_lists_from_excel(excel_file_paths, verbose=True)
     validate_lists_section_with_schema(lists_section=finished_lists)
 
-    with open(outfile, "w", encoding="utf-8") as fp:
-        json.dump({"lists": finished_lists}, fp, indent=4, sort_keys=False, ensure_ascii=False)
-        print("List was created successfully and written to file:", outfile)
+    # write final "lists" section
+    if path_to_output_file:
+        with open(path_to_output_file, "w", encoding="utf-8") as fp:
+            json.dump(finished_lists, fp, indent=4, ensure_ascii=False)
+            print('"lists" section was created successfully and written to file:', path_to_output_file)
+
+    return finished_lists
diff --git a/knora/dsplib/utils/excel_to_json_properties.py b/knora/dsplib/utils/excel_to_json_properties.py
index 822ba9584..78b800457 100644
--- a/knora/dsplib/utils/excel_to_json_properties.py
+++ b/knora/dsplib/utils/excel_to_json_properties.py
@@ -1,37 +1,32 @@
 import json
-import os
 import re
-from typing import Any
-
+from typing import Any, Optional
 import jsonschema
 import pandas as pd
 
-from knora.dsplib.utils.excel_to_json_resources import prepare_dataframe
+from knora.dsplib.models.helpers import BaseError
+from knora.dsplib.utils.shared import prepare_dataframe
 
 languages = ["en", "de", "fr", "it", "rm"]
 
 
-def _validate_properties_with_schema(json_file: str) -> bool:
+def _validate_properties_with_schema(properties_list: list[dict[str, Any]]) -> bool:
     """
-    This function checks if the json properties are valid according to the schema.
+    This function checks if the "properties" section of a JSON project file is valid according to the schema.
 
     Args:
-        json_file: the json with the properties to be validated
+        properties_list: the "properties" section of a JSON project as a list of dicts
 
     Returns:
-        True if the data passed validation, False otherwise
-
+        True if the "properties" section passed validation. Otherwise, a BaseError with a detailed error report is raised.
     """
-    current_dir = os.path.dirname(os.path.realpath(__file__))
-    with open(os.path.join(current_dir, "../schemas/properties-only.json")) as schema:
+    with open("knora/dsplib/schemas/properties-only.json") as schema:
         properties_schema = json.load(schema)
-
     try:
-        jsonschema.validate(instance=json_file, schema=properties_schema)
+        jsonschema.validate(instance=properties_list, schema=properties_schema)
     except jsonschema.exceptions.ValidationError as err:
-        print(err)
-        return False
-    print("Properties data passed schema validation.")
+        raise BaseError(f'"properties" section did not pass validation. The error message is: {err.message}\n'
+                        f'The error occurred at {err.json_path}')
     return True
 
 
@@ -42,19 +37,19 @@ def _row2prop(row: pd.Series, row_count: int, excelfile: str) -> dict[str, Any]:
     Args:
         row: row from a pandas DataFrame that defines a property
         row_count: row number of Excel file
-        excelfile: name of the original excel file
+        excelfile: name of the original Excel file
 
     Returns:
         dict object of the property
     """
 
+    # extract the elements that are necessary to build the property
     name = row["name"]
     supers = [s.strip() for s in row["super"].split(",")]
     _object = row["object"]
     labels = {lang: row[lang] for lang in languages if row.get(lang)}
     comments = {lang: row[f"comment_{lang}"] for lang in languages if row.get(f"comment_{lang}")}
     gui_element = row["gui_element"]
-
     gui_attributes = dict()
     if row.get("hlist"):
         gui_attributes["hlist"] = row["hlist"]
@@ -71,12 +66,13 @@ def _row2prop(row: pd.Series, row_count: int, excelfile: str) -> dict[str, Any]:
                 val = int(val)
             gui_attributes[attr] = val
 
-    # build the dict structure of this property and append it to the list of properties
+    # build the dict structure of this property
     _property = {
         "name": name,
         "super": supers,
         "object": _object,
-        "labels": labels}
+        "labels": labels
+    }
     if comments:
         _property["comments"] = comments
     _property["gui_element"] = gui_element
@@ -86,16 +82,17 @@ def _row2prop(row: pd.Series, row_count: int, excelfile: str) -> dict[str, Any]:
     return _property
 
 
-def properties_excel2json(excelfile: str, outfile: str) -> None:
+def excel2properties(excelfile: str, path_to_output_file: Optional[str] = None) -> list[dict[str, Any]]:
     """
-    Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology
+    Converts properties described in an Excel file into a "properties" section which can be inserted into a JSON
+    project file.
 
     Args:
         excelfile: path to the Excel file containing the properties
-        outfile: path to the output JSON file containing the properties section for the ontology
+        path_to_output_file: if provided, the output is written into this JSON file
 
     Returns:
-        None
+        the "properties" section as Python list
     """
     
     # load file
@@ -107,12 +104,12 @@ def properties_excel2json(excelfile: str, outfile: str) -> None:
 
     # transform every row into a property
     props = [_row2prop(row, i, excelfile) for i, row in df.iterrows()]
+    _validate_properties_with_schema(props)
+
+    # write final JSON file
+    if path_to_output_file:
+        with open(file=path_to_output_file, mode="w", encoding="utf-8") as file:
+            json.dump(props, file, indent=4, ensure_ascii=False)
+            print('"properties" section was created successfully and written to file:', path_to_output_file)
 
-    # write final list to JSON file if list passed validation
-    if _validate_properties_with_schema(json.loads(json.dumps(props, indent=4))):
-        with open(file=outfile, mode="w+", encoding="utf-8") as file:
-            file.write('"properties": ')
-            json.dump(props, file, indent=4)
-            print("Properties file was created successfully and written to file: ", outfile)
-    else:
-        print("Properties data is not valid according to schema.")
+    return props
diff --git a/knora/dsplib/utils/excel_to_json_resources.py b/knora/dsplib/utils/excel_to_json_resources.py
index c5d5641e5..af9615e03 100644
--- a/knora/dsplib/utils/excel_to_json_resources.py
+++ b/knora/dsplib/utils/excel_to_json_resources.py
@@ -1,70 +1,33 @@
 import json
-import os
-import re
-from typing import Any
-
+from typing import Any, Optional
 import jsonschema
 import pandas as pd
+from knora.dsplib.models.helpers import BaseError
+from knora.dsplib.utils.shared import prepare_dataframe
 
 languages = ["en", "de", "fr", "it", "rm"]
 
 
-def _validate_resources_with_schema(json_file: str) -> bool:
+def _validate_resources_with_schema(resources_list: list[dict[str, Any]]) -> bool:
     """
-    This function checks if the json resources are valid according to the schema.
+    This function checks if the "resources" section of a JSON project file is valid according to the schema.
 
     Args:
-        json_file: the json with the resources to be validated
+        resources_list: the "resources" section of a JSON project as a list of dicts
 
     Returns:
-        True if the data passed validation, False otherwise
+        True if the "resources" section passed validation. Otherwise, a BaseError with a detailed error report is raised.
     """
-    current_dir = os.path.dirname(os.path.realpath(__file__))
-    with open(os.path.join(current_dir, "../schemas/resources-only.json")) as schema:
+    with open("knora/dsplib/schemas/resources-only.json") as schema:
         resources_schema = json.load(schema)
-
     try:
-        jsonschema.validate(instance=json_file, schema=resources_schema)
+        jsonschema.validate(instance=resources_list, schema=resources_schema)
     except jsonschema.exceptions.ValidationError as err:
-        print(err)
-        return False
-    print("Resource data passed schema validation.")
+        raise BaseError(f'"resources" section did not pass validation. The error message is: {err.message}\n'
+                        f'The error occurred at {err.json_path}')
     return True
 
 
-def prepare_dataframe(df: pd.DataFrame, required_columns: list[str], location_of_sheet: str) -> pd.DataFrame:
-    """
-    Takes a pandas DataFrame, strips the column headers from whitespaces and transforms them to lowercase,
-    strips every cell from whitespaces and inserts "" if there is no string in it, and deletes the rows that don't have
-    a value in one of the required cells.
-
-    Args:
-        df: pandas DataFrame
-        required_columns: headers of the columns where a value is required
-        location_of_sheet: for better error messages, provide this information of the caller
-
-    Returns:
-        prepared DataFrame
-    """
-
-    any_char_regex = r"[\wäàçëéèêïöôòüÄÀÇËÉÊÏÖÔÒÜ]"
-
-    # strip column headers and transform to lowercase, so that the script doesn't break when the headers vary a bit
-    new_df = df.rename(columns=lambda x: x.strip().lower())
-    required_columns = [x.strip().lower() for x in required_columns]
-    # strip every cell, and insert "" if there is no valid word in it
-    new_df = new_df.applymap(lambda x: str(x).strip() if pd.notna(x) and re.search(any_char_regex, str(x), flags=re.IGNORECASE) else "")
-    # delete rows that don't have the required columns
-    for req in required_columns:
-        if req not in new_df:
-            raise ValueError(f"{location_of_sheet} requires a column named '{req}'")
-        new_df = new_df[pd.notna(new_df[req])]
-        new_df = new_df[[bool(re.search(any_char_regex, x, flags=re.IGNORECASE)) for x in new_df[req]]]
-    if len(new_df) < 1:
-        raise ValueError(f"{location_of_sheet} requires at least one row")
-    return new_df
-
-
 def _row2resource(row: pd.Series, excelfile: str) -> dict[str, Any]:
     """
     Method that takes a row from a pandas DataFrame, reads its content, and returns a dict object of the resource
@@ -112,16 +75,17 @@ def _row2resource(row: pd.Series, excelfile: str) -> dict[str, Any]:
     return resource
 
 
-def resources_excel2json(excelfile: str, outfile: str) -> None:
+def excel2resources(excelfile: str, path_to_output_file: Optional[str] = None) -> list[dict[str, Any]]:
     """
-    Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology
+    Converts resources described in an Excel file into a "resources" section which can be inserted into a JSON
+    project file.
 
     Args:
-        excelfile: path to the Excel file containing the properties
-        outfile: path to the output JSON file containing the properties section for the ontology
+        excelfile: path to the Excel file containing the resources
+        path_to_output_file: if provided, the output is written into this JSON file
 
     Returns:
-        None
+        the "resources" section as Python list
     """
 
     # load file
@@ -129,16 +93,17 @@ def resources_excel2json(excelfile: str, outfile: str) -> None:
     all_classes_df = prepare_dataframe(
         df=all_classes_df,
         required_columns=["name", "super"],
-        location_of_sheet=f"Sheet 'classes' in file '{excelfile}'")
+        location_of_sheet=f"Sheet 'classes' in file '{excelfile}'"
+    )
 
     # transform every row into a resource
     resources = [_row2resource(row, excelfile) for i, row in all_classes_df.iterrows()]
+    _validate_resources_with_schema(resources)
+
+    # write final "resources" section into a JSON file
+    if path_to_output_file:
+        with open(file=path_to_output_file, mode="w", encoding="utf-8") as file:
+            json.dump(resources, file, indent=4, ensure_ascii=False)
+            print('"resources" section was created successfully and written to file:', path_to_output_file)
 
-    # write final list of all resources to JSON file, if list passed validation
-    if _validate_resources_with_schema(json.loads(json.dumps(resources, indent=4))):
-        with open(file=outfile, mode="w+", encoding="utf-8") as file:
-            file.write('"resources": ')
-            json.dump(resources, file, indent=4)
-            print("Resource file was created successfully and written to file ", outfile)
-    else:
-        print("Resource data is not valid according to schema.")
+    return resources
diff --git a/knora/dsplib/utils/onto_create_lists.py b/knora/dsplib/utils/onto_create_lists.py
index 9a3340510..faff0fe71 100644
--- a/knora/dsplib/utils/onto_create_lists.py
+++ b/knora/dsplib/utils/onto_create_lists.py
@@ -7,7 +7,7 @@
 from ..models.helpers import BaseError
 from ..models.listnode import ListNode
 from ..models.project import Project
-from .shared_methods import login, try_network_action
+from .shared import login, try_network_action
 
 
 def _create_list_node(
diff --git a/knora/dsplib/utils/onto_create_ontology.py b/knora/dsplib/utils/onto_create_ontology.py
index ec3bc3b19..1f3224dde 100644
--- a/knora/dsplib/utils/onto_create_ontology.py
+++ b/knora/dsplib/utils/onto_create_ontology.py
@@ -16,7 +16,7 @@
 from knora.dsplib.utils.excel_to_json_lists import expand_lists_from_excel
 from knora.dsplib.utils.onto_create_lists import create_lists
 from knora.dsplib.utils.onto_validate import validate_project
-from knora.dsplib.utils.shared_methods import login, try_network_action
+from knora.dsplib.utils.shared import login, try_network_action
 
 
 def _create_project(con: Connection, project_definition: dict[str, Any]) -> Project:
diff --git a/knora/dsplib/utils/shared_methods.py b/knora/dsplib/utils/shared.py
similarity index 50%
rename from knora/dsplib/utils/shared_methods.py
rename to knora/dsplib/utils/shared.py
index 4c34468f1..6ade52982 100644
--- a/knora/dsplib/utils/shared_methods.py
+++ b/knora/dsplib/utils/shared.py
@@ -1,13 +1,15 @@
-import re
 import time
-from datetime import datetime
-from typing import Union, Callable, Any, Optional
-
+import unicodedata
+import pandas as pd
+import regex
 from lxml import etree
 from requests import RequestException
+from datetime import datetime
+from typing import Callable, Any, Optional
 
 from knora.dsplib.models.connection import Connection
 from knora.dsplib.models.helpers import BaseError
+from knora.dsplib.models.propertyelement import PropertyElement
 
 
 def login(server: str, user: str, password: str) -> Connection:
@@ -64,7 +66,7 @@ def try_network_action(
             time.sleep(2 ** i)
             continue
         except BaseError as err:
-            if re.search(r'try again later', err.message) or re.search(r'status code=5\d\d', err.message):
+            if regex.search(r'try again later', err.message) or regex.search(r'status code=5\d\d', err.message):
                 print(f'{datetime.now().isoformat()}: Try reconnecting to DSP server, next attempt in {2 ** i} seconds...')
                 time.sleep(2 ** i)
                 continue
@@ -107,3 +109,92 @@ def validate_xml_against_schema(input_file: str, schema_file: str) -> bool:
         for error in xmlschema.error_log:
             error_msg = error_msg + f"\n  Line {error.line}: {error.message}"
         raise BaseError(error_msg)
+
+
+def prepare_dataframe(df: pd.DataFrame, required_columns: list[str], location_of_sheet: str) -> pd.DataFrame:
+    """
+    Takes a pandas DataFrame, strips the column headers from whitespaces and transforms them to lowercase,
+    strips every cell from whitespaces and inserts "" if there is no string in it, and deletes the rows that don't have
+    a value in one of the required cells.
+
+    Args:
+        df: pandas DataFrame
+        required_columns: headers of the columns where a value is required
+        location_of_sheet: for better error messages, provide this information of the caller
+
+    Returns:
+        prepared DataFrame
+    """
+    # strip column headers and transform to lowercase, so that the script doesn't break when the headers vary a bit
+    new_df = df.rename(columns=lambda x: x.strip().lower())
+    required_columns = [x.strip().lower() for x in required_columns]
+    # strip every cell, and insert "" if there is no valid word in it
+    new_df = new_df.applymap(
+        lambda x: str(x).strip() if pd.notna(x) and regex.search(r"[\w\p{L}]", str(x), flags=regex.U) else ""
+    )
+    # delete rows that don't have the required columns
+    for req in required_columns:
+        if req not in new_df:
+            raise ValueError(f"{location_of_sheet} requires a column named '{req}'")
+        new_df = new_df[pd.notna(new_df[req])]
+        new_df = new_df[[bool(regex.search(r"[\w\p{L}]", x, flags=regex.U)) for x in new_df[req]]]
+    if len(new_df) < 1:
+        raise ValueError(f"{location_of_sheet} requires at least one row")
+    return new_df
+
+
+def simplify_name(value: str) -> str:
+    """
+    Simplifies a given value in order to use it as node name
+
+    Args:
+        value: The value to be simplified
+
+    Returns:
+        str: The simplified value
+    """
+    simplified_value = str(value).lower()
+
+    # normalize characters (p.ex. ä becomes a)
+    simplified_value = unicodedata.normalize("NFKD", simplified_value)
+
+    # replace forward slash and whitespace with a dash
+    simplified_value = regex.sub("[/\\s]+", "-", simplified_value)
+
+    # delete all characters which are not letters, numbers or dashes
+    simplified_value = regex.sub("[^A-Za-z0-9\\-]+", "", simplified_value)
+
+    return simplified_value
+
+
+def check_notna(value: Optional[Any]) -> bool:
+    """
+    Check a value if it is usable in the context of data archiving. A value is considered usable if it is
+     - a number (integer or float, but not np.nan)
+     - a boolean
+     - a string with at least one Unicode letter, underscore, or number, but not "None", "<NA>", "N/A", or "-"
+     - a PropertyElement whose "value" fulfills the above criteria
+
+    Args:
+        value: any object encountered when analysing data
+
+    Returns:
+        True if the value is usable, False if it is N/A or otherwise unusable
+    """
+
+    if isinstance(value, PropertyElement):
+        value = value.value
+
+    if any([
+        isinstance(value, int),
+        isinstance(value, float) and pd.notna(value),   # necessary because isinstance(np.nan, float)
+        isinstance(value, bool)
+    ]):
+        return True
+    elif isinstance(value, str):
+        return all([
+            regex.search(r"\p{L}|\d|_", value, flags=regex.UNICODE),
+            not bool(regex.search(r"^(none|<NA>|-|n/a)$", value, flags=regex.IGNORECASE))
+        ])
+    else:
+        return False
diff --git a/knora/dsplib/utils/xml_upload.py b/knora/dsplib/utils/xml_upload.py
index 9f761f427..65a0fa146 100644
--- a/knora/dsplib/utils/xml_upload.py
+++ b/knora/dsplib/utils/xml_upload.py
@@ -22,7 +22,7 @@
 from knora.dsplib.models.xmlpermission import XmlPermission
 from knora.dsplib.models.xmlproperty import XMLProperty
 from knora.dsplib.models.xmlresource import XMLResource
-from knora.dsplib.utils.shared_methods import try_network_action, validate_xml_against_schema
+from knora.dsplib.utils.shared import try_network_action, validate_xml_against_schema
 
 
 def _remove_circular_references(resources: list[XMLResource], verbose: bool) -> \
@@ -382,8 +382,8 @@ def _upload_resources(
                 print(err.message)
                 failed_uploads.append(resource.id)
                 continue
-            bitstream_size_uploaded_mb += round(next(bitstream_all_sizes_iterator), 1)
-            print(f"Uploaded file '{resource.bitstream.value}' ({bitstream_size_uploaded_mb} MB / {bitstream_size_total_mb} MB)")
+            bitstream_size_uploaded_mb += next(bitstream_all_sizes_iterator)
+            print(f"Uploaded file '{resource.bitstream.value}' ({bitstream_size_uploaded_mb:.1f} MB / {bitstream_size_total_mb} MB)")
             internal_file_name_bitstream = img['uploadedFiles'][0]['internalFilename']
             resource_bitstream = resource.get_bitstream(internal_file_name_bitstream, permissions_lookup)
 
diff --git a/knora/excel2xml.py b/knora/excel2xml.py
index 0a41a70a4..af6e79a0e 100644
--- a/knora/excel2xml.py
+++ b/knora/excel2xml.py
@@ -15,7 +15,8 @@
 import dataclasses
 
 from knora.dsplib.models.helpers import BaseError
-from knora.dsplib.utils.excel_to_json_lists import simplify_name
+from knora.dsplib.models.propertyelement import PropertyElement
+from knora.dsplib.utils.shared import simplify_name, check_notna
 
 ##############################
 # global variables and classes
@@ -26,47 +27,6 @@
 }
 
 
-@dataclasses.dataclass(frozen=True)
-class PropertyElement:
-    """
-    A PropertyElement object carries more information about a property value than the value itself.
-    The "value" is the value that could be passed to a method as plain string/int/float/bool. Use a PropertyElement
-    instead to define more precisely what attributes your <text> tag (for example) will have.
-
-    Args:
-        value: This is the content that will be written between the <text></text> tags (for example)
-        permissions: This is the permissions that your <text> tag (for example) will have
-        comment: This is the comment that your <text> tag (for example) will have
-        encoding: For <text> tags only. Can be "xml" or "utf8".
-
-    Examples:
-        See the difference between the first and the second example:
-
-        >>> make_text_prop(":testproperty", "first text")
-                <text-prop name=":testproperty">
-                    <text encoding="utf8" permissions="prop-default">
-                        first text
-                    </text>
-                </text-prop>
-        >>> make_text_prop(":testproperty", PropertyElement("first text", permissions="prop-restricted", encoding="xml"))
-                <text-prop name=":testproperty">
-                    <text encoding="xml" permissions="prop-restricted">
-                        first text
-                    </text>
-                </text-prop>
-    """
-    value: Union[str, int, float, bool]
-    permissions: str = "prop-default"
-    comment: Optional[str] = None
-    encoding: Optional[str] = None
-
-    def __post_init__(self) -> None:
-        if not check_notna(self.value):
-            raise BaseError(f"'{self.value}' is not a valid value for a PropertyElement")
-        if self.encoding not in ["utf8", "xml", None]:
-            raise BaseError(f"'{self.encoding}' is not a valid encoding for a PropertyElement")
-
-
 ###########
 # functions
 ###########
@@ -269,39 +229,6 @@ def find_date_in_string(string: str, calling_resource: str = "") -> Optional[str
         return None
 
 
-def check_notna(value: Optional[Any]) -> bool:
-    """
-    Check a value if it is usable in the context of data archiving. A value is considered usable if it is
-     - a number (integer or float, but not np.nan)
-     - a boolean
-     - a string with at least one Unicode letter, underscore, or number, but not "None", "<NA>", "N/A", or "-"
-     - a PropertyElement whose "value" fulfills the above criteria
-
-    Args:
-        value: any object encountered when analysing data
-
-    Returns:
-        True if the value is usable, False if it is N/A or otherwise unusable
-    """
-
-    if isinstance(value, PropertyElement):
-        value = value.value
-
-    if any([
-        isinstance(value, int),
-        isinstance(value, float) and pd.notna(value),   # necessary because isinstance(np.nan, float)
-        isinstance(value, bool)
-    ]):
-        return True
-    elif isinstance(value, str):
-        return all([
-            regex.search(r"\p{L}|\d|_", value, flags=re.UNICODE),
-            not bool(re.search(r"^(none|<NA>|-|n/a)$", value, flags=re.IGNORECASE))
-        ])
-    else:
-        return False
-
-
 def _check_and_prepare_values(
     value: Optional[Union[PropertyElement, str, int, float, bool]],
     values: Optional[Iterable[Union[PropertyElement, str, int, float, bool]]],
diff --git a/test/e2e/test_tools.py b/test/e2e/test_tools.py
index 281687402..f20f4d991 100644
--- a/test/e2e/test_tools.py
+++ b/test/e2e/test_tools.py
@@ -6,8 +6,8 @@
 import re
 
 from knora.dsplib.utils import excel_to_json_lists
-from knora.dsplib.utils.excel_to_json_properties import properties_excel2json
-from knora.dsplib.utils.excel_to_json_resources import resources_excel2json
+from knora.dsplib.utils.excel_to_json_properties import excel2properties
+from knora.dsplib.utils.excel_to_json_resources import excel2resources
 from knora.dsplib.utils.id_to_iri import id_to_iri
 from knora.dsplib.utils.onto_create_ontology import create_project
 from knora.dsplib.utils.onto_get import get_ontology
@@ -157,16 +157,16 @@ def test_get(self) -> None:
         self.assertEqual(excel_list.get('comments'), excel_list_out.get('comments'))
 
     def test_excel_to_json_list(self) -> None:
-        excel_to_json_lists.list_excel2json(excelfolder='testdata/multilingual_lists',
-                                            outfile='testdata/tmp/_lists-out.json')
+        excel_to_json_lists.excel2lists(excelfolder='testdata/lists_multilingual',
+                                        path_to_output_file='testdata/tmp/_lists-out.json')
 
     def test_excel_to_json_resources(self) -> None:
-        resources_excel2json(excelfile='testdata/Resources.xlsx',
-                             outfile='testdata/tmp/_out_resources.json')
+        excel2resources(excelfile='testdata/Resources.xlsx',
+                        path_to_output_file='testdata/tmp/_out_resources.json')
 
     def test_excel_to_json_properties(self) -> None:
-        properties_excel2json(excelfile='testdata/Properties.xlsx',
-                              outfile='testdata/tmp/_out_properties.json')
+        excel2properties(excelfile='testdata/Properties.xlsx',
+                         path_to_output_file='testdata/tmp/_out_properties.json')
 
     def test_create_project(self) -> None:
         result1 = create_project(
diff --git a/test/unittests/test_excel2xml.py b/test/unittests/test_excel2xml.py
index 6fd104012..a897e4b01 100644
--- a/test/unittests/test_excel2xml.py
+++ b/test/unittests/test_excel2xml.py
@@ -141,18 +141,6 @@ def test_make_xsd_id_compatible(self) -> None:
         self.assertRaises(BaseError, excel2xml.make_xsd_id_compatible, ".")
 
 
-    def test_check_notna(self) -> None:
-        na_values = [None, pd.NA, np.nan, "", "  ", "-", ",", ".", "*", "!", " ⳰", " ῀ ", " ῾ ", " \n\t ", "N/A", "n/a",
-                     "<NA>", ["a", "b"], pd.array(["a", "b"]), np.array([0, 1])]
-        for na_value in na_values:
-            self.assertFalse(excel2xml.check_notna(na_value), msg=f"Failed na_value: {na_value}")
-
-        notna_values = [1, 0.1, True, False, "True", "False", r" \n\t ", "0", "_", "Ὅμηρος"]
-        notna_values.extend([excel2xml.PropertyElement(x) for x in notna_values])
-        for notna_value in notna_values:
-            self.assertTrue(excel2xml.check_notna(notna_value), msg=f"Failed notna_value: {notna_value}")
-
-
     def test_find_date_in_string(self) -> None:
 
         # template: 2021-01-01 | 2015_01_02
diff --git a/test/unittests/test_excel_to_json_lists.py b/test/unittests/test_excel_to_json_lists.py
index 4d9714112..d1feb05a4 100644
--- a/test/unittests/test_excel_to_json_lists.py
+++ b/test/unittests/test_excel_to_json_lists.py
@@ -1,4 +1,5 @@
 """unit tests for Excel to JSON list"""
+import copy
 import os
 import unittest
 import json
@@ -6,6 +7,7 @@
 import jsonpath_ng.ext
 import pandas as pd
 import regex
+from typing import Any
 
 from knora.dsplib.models.helpers import BaseError
 from knora.dsplib.utils import excel_to_json_lists as e2l
@@ -25,20 +27,95 @@ def tearDownClass(cls) -> None:
             os.remove('testdata/tmp/' + file)
         os.rmdir('testdata/tmp')
 
-    def test_excel2jsonlist(self) -> None:
+
+    def test_expand_lists_from_excel(self) -> None:
+        # take the "lists" section of the systematic test project, expand it, and check if it is equal to the expanded
+        # version stored in the testdata folder
+        with open("testdata/test-project-systematic.json") as f:
+            lists_with_excel_reference = json.load(f)["project"]["lists"]
+        lists_with_excel_reference_output, success1 = e2l.expand_lists_from_excel(lists_with_excel_reference)
+        with open("testdata/lists_section_expanded.json") as f:
+            lists_with_excel_reference_output_expected = json.load(f)["expanded lists section of test-project-systematic.json"]
+        self.assertTrue(success1)
+        self.assertListEqual(lists_with_excel_reference_output, lists_with_excel_reference_output_expected)
+
+        # take the expanded version, and make sure that it is returned unchanged
+        lists_without_excel_reference = lists_with_excel_reference_output_expected
+        lists_without_excel_reference_output, success2 = e2l.expand_lists_from_excel(lists_without_excel_reference)
+        self.assertTrue(success2)
+        self.assertListEqual(lists_without_excel_reference, lists_without_excel_reference_output)
+
+
+    def test_make_json_lists_from_excel(self) -> None:
+        lists_multilingual = [f"testdata/lists_multilingual/{lang}.xlsx" for lang in ["de", "en", "fr"]]
+        lists_multilingual_output = e2l._make_json_lists_from_excel(lists_multilingual)
+        with open("testdata/lists_multilingual_output_expected.json") as f:
+            lists_multilingual_output_expected = json.load(f)
+        self.assertListEqual(lists_multilingual_output, lists_multilingual_output_expected)
+
+
+    def test_validate_lists_section_with_schema(self) -> None:
+        with open("testdata/lists_multilingual_output_expected.json") as f:
+            lists_section_valid = json.load(f)
+
+        # validate the valid "lists" section in a correct way
+        self.assertTrue(e2l.validate_lists_section_with_schema(lists_section=lists_section_valid))
+
+        # remove mandatory "comments" section from root node
+        lists_section_without_comment_at_rootnode = copy.deepcopy(lists_section_valid)
+        del lists_section_without_comment_at_rootnode[0]["comments"]
+        with self.assertRaisesRegex(
+            BaseError,
+            "\"lists\" section did not pass validation. The error message is: 'comments' is a required property"
+        ):
+            e2l.validate_lists_section_with_schema(lists_section=lists_section_without_comment_at_rootnode)
+
+        # remove mandatory "comments" section from root node
+        lists_section_with_invalid_lang = copy.deepcopy(lists_section_valid)
+        lists_section_with_invalid_lang[0]["comments"]["eng"] = "wrong English label"
+        with self.assertRaisesRegex(
+            BaseError,
+            "\"lists\" section did not pass validation. The error message is: 'eng' does not match any of the regexes"
+        ):
+            e2l.validate_lists_section_with_schema(lists_section=lists_section_with_invalid_lang)
+
+        # wrong usage of the method
+        with self.assertRaisesRegex(
+            BaseError,
+            "Validation of the 'lists' section works only if exactly one of the two arguments is given."
+        ):
+            e2l.validate_lists_section_with_schema(
+                path_to_json_project_file="testdata/test-project-systematic.json",
+                lists_section=lists_section_valid
+            )
+        with self.assertRaisesRegex(
+            BaseError,
+            "Validation of the 'lists' section works only if exactly one of the two arguments is given."
+        ):
+            e2l.validate_lists_section_with_schema()
+
+        # pass a file that doesn't have a "lists" section
+        with self.assertRaisesRegex(BaseError, "there is no \"lists\" section"):
+            e2l.validate_lists_section_with_schema(path_to_json_project_file="testdata/test-project-minimal.json")
+
+
+    def test_excel2lists(self) -> None:
         for mode in ["monolingual", "multilingual"]:
             # create output files
-            input_df = pd.read_excel(f"testdata/{mode}_lists/de.xlsx", header=None, dtype='str')
+            input_df = pd.read_excel(f"testdata/lists_{mode}/de.xlsx", header=None, dtype='str')
             input_df = input_df.applymap(lambda x: x if pd.notna(x) and regex.search(r"\p{L}", str(x), flags=regex.UNICODE) else pd.NA)
             input_df.dropna(axis="index", how="all", inplace=True)
-            excelfolder = f"testdata/{mode}_lists"
+            excelfolder = f"testdata/lists_{mode}"
             outfile = f"testdata/tmp/lists_output_{mode}.json"
-            e2l.list_excel2json(excelfolder=excelfolder, outfile=outfile)
+            output_from_method = e2l.excel2lists(excelfolder=excelfolder, path_to_output_file=outfile)
 
-            # check that the output file has the same number of nodes than the Excel file has rows
+            # check that output from file and from method are equal
             with open(outfile) as f:
-                output_as_dict = json.load(f)
-            output_nodes_matches = jsonpath_ng.parse('$..name').find(output_as_dict)
+                output_from_file: list[dict[str, Any]] = json.load(f)
+            self.assertListEqual(output_from_file, output_from_method)
+
+            # check that the output file has the same number of nodes than the Excel file has rows
+            output_nodes_matches = jsonpath_ng.parse('$..name').find(output_from_file)
             self.assertTrue(
                 len(input_df.index) == len(output_nodes_matches),
                 f"The output JSON file doesn't have the same number of nodes than the Excel file has rows"
@@ -55,7 +132,7 @@ def test_excel2jsonlist(self) -> None:
                 parser_string = '$'
                 for elem in jsonpath_elems:
                     parser_string = parser_string + f'.nodes[?(@.labels.en == "{elem}")]'
-                node_match = jsonpath_ng.ext.parse(parser_string).find(output_as_dict)
+                node_match = jsonpath_ng.ext.parse(parser_string).find(output_from_file)
                 self.assertTrue(
                     len(node_match) == 1,
                     f'The node "{jsonpath_elems[-1]}" from Excel row {index+1} was not correctly translated to the '
@@ -64,9 +141,9 @@ def test_excel2jsonlist(self) -> None:
 
         # make sure that the invalid lists raise an Error
         with self.assertRaisesRegex(BaseError, r"Found duplicate in column 2, row 9"):
-            e2l.list_excel2json(excelfolder="testdata/invalid_lists_1", outfile=outfile)
+            e2l.excel2lists(excelfolder="testdata/lists_invalid_1", path_to_output_file=outfile)
         with self.assertRaisesRegex(BaseError, r"The Excel file with the language code 'de' should have a value in row 10, column 2"):
-            e2l.list_excel2json(excelfolder="testdata/invalid_lists_2", outfile=outfile)
+            e2l.excel2lists(excelfolder="testdata/lists_invalid_2", path_to_output_file=outfile)
 
 
 if __name__ == '__main__':
diff --git a/test/unittests/test_excel_to_properties.py b/test/unittests/test_excel_to_json_properties.py
similarity index 87%
rename from test/unittests/test_excel_to_properties.py
rename to test/unittests/test_excel_to_json_properties.py
index ebe6e7b00..d9d4c3b5e 100644
--- a/test/unittests/test_excel_to_properties.py
+++ b/test/unittests/test_excel_to_json_properties.py
@@ -4,6 +4,7 @@
 import json
 import jsonpath_ng
 import jsonpath_ng.ext
+from typing import Any
 
 from knora.dsplib.utils import excel_to_json_properties as e2j
 
@@ -22,10 +23,10 @@ def tearDownClass(cls) -> None:
             os.remove('testdata/tmp/' + file)
         os.rmdir('testdata/tmp')
 
-    def test_excel2json(self) -> None:
+    def test_excel2properties(self) -> None:
         excelfile = "testdata/Properties.xlsx"
         outfile = "testdata/tmp/_out_properties.json"
-        e2j.properties_excel2json(excelfile, outfile)
+        output_from_method = e2j.excel2properties(excelfile, outfile)
 
         # define the expected values from the excel file
         excel_names = ["correspondsToGenericAnthroponym", "hasAnthroponym", "hasGender", "isDesignatedAs", "hasTitle",
@@ -76,16 +77,17 @@ def test_excel2json(self) -> None:
 
         # read json file
         with open(outfile) as f:
-            json_string = f.read()
-            json_string = "{" + json_string + "}"
-            json_file = json.loads(json_string)
+            output_from_file: list[dict[str, Any]] = json.load(f)
+
+        # check that output from file and from method are equal
+        self.assertListEqual(output_from_file, output_from_method)
 
         # extract infos from json file
-        json_names = [match.value for match in jsonpath_ng.parse("$.properties[*].name").find(json_file)]
-        json_supers = [match.value for match in jsonpath_ng.parse("$.properties[*].super").find(json_file)]
-        json_objects = [match.value for match in jsonpath_ng.parse("$.properties[*].object").find(json_file)]
+        json_names = [match.value for match in jsonpath_ng.parse("$[*].name").find(output_from_file)]
+        json_supers = [match.value for match in jsonpath_ng.parse("$[*].super").find(output_from_file)]
+        json_objects = [match.value for match in jsonpath_ng.parse("$[*].object").find(output_from_file)]
 
-        json_labels_all = [match.value for match in jsonpath_ng.parse("$.properties[*].labels").find(json_file)]
+        json_labels_all = [match.value for match in jsonpath_ng.parse("$[*].labels").find(output_from_file)]
         json_labels: dict[str, list[str]] = dict()
         for lang in ["de", "it"]:
             json_labels[lang] = [label.get(lang, "").strip() for label in json_labels_all]
@@ -93,13 +95,13 @@ def test_excel2json(self) -> None:
         json_comments: dict[str, list[str]] = dict()
         for lang in ["fr", "it"]:
             json_comments[f"comment_{lang}"] = [resource.get("comments", {}).get(lang, "").strip()
-                                               for resource in json_file["properties"]]
+                                               for resource in output_from_file]
 
-        json_gui_elements = [match.value for match in jsonpath_ng.parse("$.properties[*].gui_element").find(json_file)]
+        json_gui_elements = [match.value for match in jsonpath_ng.parse("$[*].gui_element").find(output_from_file)]
 
-        json_gui_attributes_hasGender = jsonpath_ng.ext.parse("$.properties[?name='hasGender'].gui_attributes").find(json_file)[0].value
-        json_gui_attributes_hasGND = jsonpath_ng.ext.parse("$.properties[?name='hasGND'].gui_attributes").find(json_file)[0].value
-        json_gui_attributes_hasDecimal = jsonpath_ng.ext.parse("$.properties[?name='hasDecimal'].gui_attributes").find(json_file)[0].value
+        json_gui_attributes_hasGender = jsonpath_ng.ext.parse("$[?name='hasGender'].gui_attributes").find(output_from_file)[0].value
+        json_gui_attributes_hasGND = jsonpath_ng.ext.parse("$[?name='hasGND'].gui_attributes").find(output_from_file)[0].value
+        json_gui_attributes_hasDecimal = jsonpath_ng.ext.parse("$[?name='hasDecimal'].gui_attributes").find(output_from_file)[0].value
 
         # make checks
         self.assertListEqual(excel_names, json_names)
diff --git a/test/unittests/test_excel_to_resource.py b/test/unittests/test_excel_to_json_resources.py
similarity index 67%
rename from test/unittests/test_excel_to_resource.py
rename to test/unittests/test_excel_to_json_resources.py
index 8a74d4d9a..a92add6c9 100644
--- a/test/unittests/test_excel_to_resource.py
+++ b/test/unittests/test_excel_to_json_resources.py
@@ -4,9 +4,7 @@
 import json
 import jsonpath_ng
 import jsonpath_ng.ext
-import pandas as pd
-import numpy as np
-
+from typing import Any
 from knora.dsplib.utils import excel_to_json_resources as e2j
 
 
@@ -24,31 +22,11 @@ def tearDownClass(cls) -> None:
             os.remove('testdata/tmp/' + file)
         os.rmdir('testdata/tmp')
 
-    def test_prepare_dataframe(self) -> None:
-        original_df = pd.DataFrame({
-             "  TitLE of Column 1 ": ["1",  " 0-1 ", "1-n ", pd.NA,  "    ", " ",    "",     " 0-n ", np.nan],
-             " Title of Column 2 ":  [None, "1",     1,      "text", "text", "text", "text", "text",  "text"],
-             "Title of Column 3":    ["",   pd.NA,   None,   "text", "text", "text", "text", np.nan,  "text"]
-        })
-        expected_df = pd.DataFrame({
-            "title of column 1":     [      "0-1", "1-n",                                  "0-n"],
-            "title of column 2":     [      "1",   "1",                                    "text"],
-            "title of column 3":     [      "",    "",                                     ""]
-        })
-        returned_df = e2j.prepare_dataframe(
-            df=original_df,
-            required_columns=["  TitLE of Column 1 ", " Title of Column 2 "],
-            location_of_sheet=''
-        )
-        for expected, returned in zip(expected_df.iterrows(), returned_df.iterrows()):
-            _, expected_row = expected
-            _, returned_row = returned
-            self.assertListEqual(list(expected_row), list(returned_row))
-
-    def test_excel2json(self) -> None:
+
+    def test_excel2resources(self) -> None:
         excelfile = "testdata/Resources.xlsx"
         outfile = "testdata/tmp/_out_resources.json"
-        e2j.resources_excel2json(excelfile, outfile)
+        output_from_method = e2j.excel2resources(excelfile, outfile)
 
         # define the expected values from the excel file
         excel_names = ["Owner", "Title", "GenericAnthroponym", "FamilyMember", "MentionedPerson", "Alias", "Image",
@@ -80,32 +58,33 @@ def test_excel2json(self) -> None:
 
         # read json file
         with open(outfile) as f:
-            json_string = f.read()
-            json_string = "{" + json_string + "}"
-            json_file = json.loads(json_string)
+            output_from_file: list[dict[str, Any]] = json.load(f)
 
+        # check that output from file and from method are equal
+        self.assertListEqual(output_from_file, output_from_method)
+            
         # extract infos from json file
-        json_names = [match.value for match in jsonpath_ng.parse("$.resources[*].name").find(json_file)]
-        json_supers = [match.value for match in jsonpath_ng.parse("$.resources[*].super").find(json_file)]
+        json_names = [match.value for match in jsonpath_ng.parse("$[*].name").find(output_from_file)]
+        json_supers = [match.value for match in jsonpath_ng.parse("$[*].super").find(output_from_file)]
 
-        json_labels_all = [match.value for match in jsonpath_ng.parse("$.resources[*].labels").find(json_file)]
+        json_labels_all = [match.value for match in jsonpath_ng.parse("$[*].labels").find(output_from_file)]
         json_labels: dict[str, list[str]] = dict()
         for lang in ["en", "rm"]:
             json_labels[lang] = [label.get(lang, "").strip() for label in json_labels_all]
-        json_labels_of_image = jsonpath_ng.ext.parse('$.resources[?name="Image"].labels').find(json_file)[0].value
+        json_labels_of_image = jsonpath_ng.ext.parse('$[?name="Image"].labels').find(output_from_file)[0].value
 
         json_comments: dict[str, list[str]] = dict()
         for lang in ["de", "fr"]:
             # make sure the lists of the json comments contain a blank string even if there is no "comments" section
             # at all in this resource
             json_comments[f"comment_{lang}"] = [resource.get("comments", {}).get(lang, "").strip()
-                                               for resource in json_file["resources"]]
-        json_comments_of_image = jsonpath_ng.ext.parse('$.resources[?name="Image"].comments').find(json_file)[0].value
+                                               for resource in output_from_file]
+        json_comments_of_image = jsonpath_ng.ext.parse('$[?name="Image"].comments').find(output_from_file)[0].value
 
         json_first_class_properties = [match.value for match in
-                                    jsonpath_ng.parse("$.resources[0].cardinalities[*].propname").find(json_file)]
+                                    jsonpath_ng.parse("$[0].cardinalities[*].propname").find(output_from_file)]
         json_first_class_cardinalities = [match.value for match in
-                                    jsonpath_ng.parse("$.resources[0].cardinalities[*].cardinality").find(json_file)]
+                                    jsonpath_ng.parse("$[0].cardinalities[*].cardinality").find(output_from_file)]
 
         # make checks
         self.assertListEqual(excel_names, json_names)
diff --git a/test/unittests/test_shared_methods.py b/test/unittests/test_shared_methods.py
new file mode 100644
index 000000000..26ce3f220
--- /dev/null
+++ b/test/unittests/test_shared_methods.py
@@ -0,0 +1,44 @@
+import unittest
+import pandas as pd
+import numpy as np
+from knora.dsplib.utils import shared
+from knora.dsplib.models.propertyelement import PropertyElement
+
+
+class TestSharedMethods(unittest.TestCase):
+    def test_prepare_dataframe(self) -> None:
+        original_df = pd.DataFrame({
+             "  TitLE of Column 1 ": ["1",  " 0-1 ", "1-n ", pd.NA,  "    ", " ",    "",     " 0-n ", np.nan],
+             " Title of Column 2 ":  [None, "1",     1,      "text", "text", "text", "text", "text",  "text"],
+             "Title of Column 3":    ["",   pd.NA,   None,   "text", "text", "text", "text", np.nan,  "text"]
+        })
+        expected_df = pd.DataFrame({
+            "title of column 1":     [      "0-1", "1-n",                                  "0-n"],
+            "title of column 2":     [      "1",   "1",                                    "text"],
+            "title of column 3":     [      "",    "",                                     ""]
+        })
+        returned_df = shared.prepare_dataframe(
+            df=original_df,
+            required_columns=["  TitLE of Column 1 ", " Title of Column 2 "],
+            location_of_sheet=''
+        )
+        for expected, returned in zip(expected_df.iterrows(), returned_df.iterrows()):
+            i, expected_row = expected
+            _, returned_row = returned
+            self.assertListEqual(list(expected_row), list(returned_row), msg=f"Failed in row {i}")
+
+
+    def test_check_notna(self) -> None:
+        na_values = [None, pd.NA, np.nan, "", "  ", "-", ",", ".", "*", "!", " ⳰", " ῀ ", " ῾ ", " \n\t ", "N/A", "n/a",
+                     "<NA>", ["a", "b"], pd.array(["a", "b"]), np.array([0, 1])]
+        for na_value in na_values:
+            self.assertFalse(shared.check_notna(na_value), msg=f"Failed na_value: {na_value}")
+
+        notna_values = [1, 0.1, True, False, "True", "False", r" \n\t ", "0", "_", "Ὅμηρος"]
+        notna_values.extend([PropertyElement(x) for x in notna_values])
+        for notna_value in notna_values:
+            self.assertTrue(shared.check_notna(notna_value), msg=f"Failed notna_value: {notna_value}")
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/testdata/single_list/de.xlsx b/testdata/list_single/de.xlsx
similarity index 100%
rename from testdata/single_list/de.xlsx
rename to testdata/list_single/de.xlsx
diff --git a/testdata/single_list/en.xlsx b/testdata/list_single/en.xlsx
similarity index 100%
rename from testdata/single_list/en.xlsx
rename to testdata/list_single/en.xlsx
diff --git a/testdata/invalid_lists_1/de.xlsx b/testdata/lists_invalid_1/de.xlsx
similarity index 100%
rename from testdata/invalid_lists_1/de.xlsx
rename to testdata/lists_invalid_1/de.xlsx
diff --git a/testdata/invalid_lists_1/en.xlsx b/testdata/lists_invalid_1/en.xlsx
similarity index 100%
rename from testdata/invalid_lists_1/en.xlsx
rename to testdata/lists_invalid_1/en.xlsx
diff --git a/testdata/invalid_lists_1/fr.xlsx b/testdata/lists_invalid_1/fr.xlsx
similarity index 100%
rename from testdata/invalid_lists_1/fr.xlsx
rename to testdata/lists_invalid_1/fr.xlsx
diff --git a/testdata/invalid_lists_2/de.xlsx b/testdata/lists_invalid_2/de.xlsx
similarity index 100%
rename from testdata/invalid_lists_2/de.xlsx
rename to testdata/lists_invalid_2/de.xlsx
diff --git a/testdata/invalid_lists_2/en.xlsx b/testdata/lists_invalid_2/en.xlsx
similarity index 100%
rename from testdata/invalid_lists_2/en.xlsx
rename to testdata/lists_invalid_2/en.xlsx
diff --git a/testdata/invalid_lists_2/fr.xlsx b/testdata/lists_invalid_2/fr.xlsx
similarity index 100%
rename from testdata/invalid_lists_2/fr.xlsx
rename to testdata/lists_invalid_2/fr.xlsx
diff --git a/testdata/monolingual_lists/de.xlsx b/testdata/lists_monolingual/de.xlsx
similarity index 100%
rename from testdata/monolingual_lists/de.xlsx
rename to testdata/lists_monolingual/de.xlsx
diff --git a/testdata/multilingual_lists/de.xlsx b/testdata/lists_multilingual/de.xlsx
similarity index 100%
rename from testdata/multilingual_lists/de.xlsx
rename to testdata/lists_multilingual/de.xlsx
diff --git a/testdata/multilingual_lists/en.xlsx b/testdata/lists_multilingual/en.xlsx
similarity index 100%
rename from testdata/multilingual_lists/en.xlsx
rename to testdata/lists_multilingual/en.xlsx
diff --git a/testdata/multilingual_lists/fr.xlsx b/testdata/lists_multilingual/fr.xlsx
similarity index 100%
rename from testdata/multilingual_lists/fr.xlsx
rename to testdata/lists_multilingual/fr.xlsx
diff --git a/testdata/lists_multilingual_output_expected.json b/testdata/lists_multilingual_output_expected.json
new file mode 100644
index 000000000..2a8c63e00
--- /dev/null
+++ b/testdata/lists_multilingual_output_expected.json
@@ -0,0 +1,102 @@
+[
+    {
+        "name": "first-list",
+        "labels": {
+            "fr": "première liste",
+            "en": "first list",
+            "de": "erste Liste"
+        },
+        "comments": {
+            "fr": "première liste",
+            "en": "first list",
+            "de": "erste Liste"
+        },
+        "nodes": [
+            {
+                "name": "special-characters-12-0-are-embedded",
+                "labels": {
+                    "fr": "caractères spéciales 1&2-%*_0 dedans",
+                    "en": "special characters 1&2-%*_0 are embedded",
+                    "de": "Spezialzeichen 1&2-%*_0 sind eingebettet"
+                },
+                "nodes": [
+                    {
+                        "name": "very",
+                        "labels": {
+                            "fr": "très",
+                            "en": "very",
+                            "de": "sehr"
+                        },
+                        "nodes": [
+                            {
+                                "name": "deeply",
+                                "labels": {
+                                    "fr": "profondément",
+                                    "en": "deeply",
+                                    "de": "tief"
+                                },
+                                "nodes": [
+                                    {
+                                        "name": "nested",
+                                        "labels": {
+                                            "fr": "niché!",
+                                            "en": "nested!",
+                                            "de": "verschachtelt!"
+                                        }
+                                    }
+                                ]
+                            }
+                        ]
+                    }
+                ]
+            }
+        ]
+    },
+    {
+        "name": "second-list",
+        "labels": {
+            "fr": "deuxième liste",
+            "en": "second list",
+            "de": "zweite Liste"
+        },
+        "comments": {
+            "fr": "deuxième liste",
+            "en": "second list",
+            "de": "zweite Liste"
+        },
+        "nodes": [
+            {
+                "name": "first-node",
+                "labels": {
+                    "fr": "premier noeud",
+                    "en": "first node",
+                    "de": "erster Knoten"
+                }
+            },
+            {
+                "name": "duplicate-nodename",
+                "labels": {
+                    "fr": "noeud doublé",
+                    "en": "duplicate nodename",
+                    "de": "Doppelung"
+                }
+            },
+            {
+                "name": "duplicate-nodename-2",
+                "labels": {
+                    "fr": "noeud doublé!",
+                    "en": "duplicate nodename!",
+                    "de": "Doppelung!"
+                }
+            },
+            {
+                "name": "duplicate-nodename-3",
+                "labels": {
+                    "fr": "noeud doublé?",
+                    "en": "duplicate nodename?",
+                    "de": "Doppelung?"
+                }
+            }
+        ]
+    }
+]
diff --git a/testdata/lists_section_expanded.json b/testdata/lists_section_expanded.json
new file mode 100644
index 000000000..c8dd4e063
--- /dev/null
+++ b/testdata/lists_section_expanded.json
@@ -0,0 +1,114 @@
+{
+    "expanded lists section of test-project-systematic.json": [
+        {
+            "name": "testlist",
+            "labels": {
+                "en": "Testlist",
+                "rm": "Glista test in Rumantsch"
+            },
+            "comments": {
+                "en": "no comment",
+                "de": "kein Kommentar",
+                "rm": "nagin commentar in Rumantsch"
+            },
+            "nodes": [
+                {
+                    "name": "first node of testlist",
+                    "labels": {
+                        "en": "First node of the Test-List",
+                        "rm": "Rumantsch"
+                    }
+                },
+                {
+                    "name": "second node of testlist",
+                    "labels": {
+                        "en": "Second node of the Test-List"
+                    },
+                    "nodes": [
+                        {
+                            "name": "first subnode",
+                            "labels": {
+                                "en": "First Sub-Node"
+                            }
+                        },
+                        {
+                            "name": "second subnode",
+                            "labels": {
+                                "en": "Second Sub-Node"
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "third node of testlist",
+                    "labels": {
+                        "en": "Third node of the Test-List"
+                    }
+                }
+            ]
+        },
+        {
+            "name": "my-list-from-excel",
+            "labels": {
+                "en": "My list from Excel"
+            },
+            "comments": {
+                "en": "a comment",
+                "de": "ein Kommentar",
+                "fr": "un commentaire"
+            },
+            "nodes": [
+                {
+                    "name": "red",
+                    "labels": {
+                        "en": "red",
+                        "de": "rot"
+                    }
+                },
+                {
+                    "name": "yellow",
+                    "labels": {
+                        "en": "yellow",
+                        "de": "gelb"
+                    }
+                },
+                {
+                    "name": "blue",
+                    "labels": {
+                        "en": "blue",
+                        "de": "blau"
+                    }
+                },
+                {
+                    "name": "green",
+                    "labels": {
+                        "en": "green",
+                        "de": "grün"
+                    }
+                }
+            ]
+        },
+        {
+            "name": "notUsedList",
+            "labels": {
+                "en": "Not used list"
+            },
+            "comments": {
+                "en": "no comment",
+                "de": "kein Kommentar"
+            },
+            "nodes": [
+                {
+                    "name": "notUsedNode_1",
+                    "labels": {
+                        "en": "nodeLabel_1\"'"
+                    },
+                    "comments": {
+                        "en": "Nodes can have comments, too!",
+                        "rm": "Even in Rumantsch!"
+                    }
+                }
+            ]
+        }
+    ]
+}
diff --git a/testdata/test-project-systematic.json b/testdata/test-project-systematic.json
index 1c4f9656c..c8e0f659b 100644
--- a/testdata/test-project-systematic.json
+++ b/testdata/test-project-systematic.json
@@ -77,7 +77,7 @@
                     "fr": "un commentaire"
                 },
                 "nodes": {
-                    "folder": "testdata/single_list"
+                    "folder": "testdata/list_single"
                 }
             },
             {