diff --git a/Makefile b/Makefile index 6eaca66be..6a7f5f829 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ CURRENT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) .PHONY: dsp-stack dsp-stack: ## clone the dsp-api git repository and run the dsp-stack @mkdir -p .tmp - @git clone --branch main --single-branch --depth 1 https://github.com/dasch-swiss/dsp-api.git .tmp/dsp-stack + @git clone --branch v24.0.8 --single-branch https://github.com/dasch-swiss/dsp-api.git .tmp/dsp-stack $(MAKE) -C .tmp/dsp-stack env-file $(MAKE) -C .tmp/dsp-stack init-db-test $(MAKE) -C .tmp/dsp-stack stack-up @@ -51,7 +51,7 @@ install: ## install from source (runs setup.py) .PHONY: test test: dsp-stack ## run all tests located in the "test" folder (intended for local usage) - -pytest test/ + -pytest test/ # ignore errors, continue anyway with stack-down $(MAKE) stack-down .PHONY: test-no-stack @@ -60,7 +60,7 @@ test-no-stack: ## run all tests located in the "test" folder, without starting t .PHONY: test-end-to-end test-end-to-end: dsp-stack ## run e2e tests (intended for local usage) - -pytest test/e2e/ + -pytest test/e2e/ # ignore errors, continue anyway with stack-down $(MAKE) stack-down .PHONY: test-end-to-end-ci diff --git a/docs/assets/templates/lists/de.xlsx b/docs/assets/data_model_templates/lists/de.xlsx similarity index 100% rename from docs/assets/templates/lists/de.xlsx rename to docs/assets/data_model_templates/lists/de.xlsx diff --git a/docs/assets/templates/lists/en.xlsx b/docs/assets/data_model_templates/lists/en.xlsx similarity index 100% rename from docs/assets/templates/lists/en.xlsx rename to docs/assets/data_model_templates/lists/en.xlsx diff --git a/docs/assets/templates/properties_template.xlsx b/docs/assets/data_model_templates/onto_name (onto_label)/properties.xlsx similarity index 100% rename from docs/assets/templates/properties_template.xlsx rename to docs/assets/data_model_templates/onto_name (onto_label)/properties.xlsx diff --git a/docs/assets/templates/resources_template.xlsx b/docs/assets/data_model_templates/onto_name (onto_label)/resources.xlsx similarity index 100% rename from docs/assets/templates/resources_template.xlsx rename to docs/assets/data_model_templates/onto_name (onto_label)/resources.xlsx diff --git a/docs/dsp-tools-create.md b/docs/dsp-tools-create.md index 4d99d46e2..a82ebf8f3 100644 --- a/docs/dsp-tools-create.md +++ b/docs/dsp-tools-create.md @@ -437,8 +437,7 @@ To do so, it would be necessary to place the following two files into the folder ![Colors_en](./assets/images/img-list-english-colors.png) ![Farben_de](./assets/images/img-list-german-colors.png) -The expected format of the Excel files is documented -[here](./dsp-tools-excel.md#create-the-lists-section-of-a-json-project-file-from-excel-files). The only difference to +The expected format of the Excel files is documented [here](./dsp-tools-excel2json.md#lists-section). The only difference to the explanations there is that column A of the Excel worksheet is not interpreted as list name (root node), but as node name of the first children level below the root node. diff --git a/docs/dsp-tools-excel.md b/docs/dsp-tools-excel2json.md similarity index 75% rename from docs/dsp-tools-excel.md rename to docs/dsp-tools-excel2json.md index d162dca12..e019f77a7 100644 --- a/docs/dsp-tools-excel.md +++ b/docs/dsp-tools-excel2json.md @@ -1,22 +1,62 @@ [![PyPI version](https://badge.fury.io/py/dsp-tools.svg)](https://badge.fury.io/py/dsp-tools) -# Excel files for data modelling and data import +# `excel2json`: Create a data model (JSON project file) from Excel -dsp-tools is able to process Excel files and output the appropriate JSON or XML file. The JSON/XML file can then be -used to create the ontology on the DSP server or import data to the DSP repository. dsp-tools can also be used to -create a list from an Excel file. +With dsp-tools, a JSON project file can be created from Excel files. The command for this is documented +[here](./dsp-tools-usage.md#create-a-json-project-file-from-excel-files). +A JSON project consists of + - 0-1 "lists" sections + - 1-n ontologies, each containing + - 1 "properties" section + - 1 "resources" section +For each of these 3 sections, one or several Excel files are necessary. The Excel files and their format are described +below. If you want to convert the Excel files to JSON, it is possible to invoke a command for each of these sections +separately (as described below). -## JSON project file: "resources" section from Excel file +But it is more convenient to use the command that creates the entire JSON project file. In order to do so, put all +involved files into a folder with the following structure: +``` +data_model_files +|-- lists +| |-- de.xlsx +| `-- en.xlsx +`-- onto_name (onto_label) + |-- properties.xlsx + `-- resources.xlsx +``` + +Conventions for the folder names: + + - The "lists" folder must have exactly this name, if it exists. It can also be omitted. + - Replace "onto_name" by your ontology's name, and "onto_label" by your ontology's label. + - The only name that can be chosen freely is the name of the topmost folder ("data_model_files" in this example). + +Then, use the following command: +``` +dsp-tools excel2json data_model_files project.json +``` + +This will create a file `project.json` with the lists, properties, and resources from the Excel files. + +Please note that the "header" of the resulting JSON file is empty and thus invalid. It is necessary to add the project +shortcode, name, description, keywords, etc. by hand. + +Continue reading the following paragraphs to learn more about the expected structure of the Excel files. + + + + +## "resources" section With dsp-tools, the `resources` section used in a data model (JSON) can be created from an Excel file. The command for this is documented [here](./dsp-tools-usage.md#create-the-resources-section-of-a-json-project-file-from-an-excel-file). Only `XLSX` files are allowed. The `resources` section can be inserted into the ontology file and then be uploaded onto a DSP server. -**An Excel file template can be found [here](assets/templates/resources_template.xlsx). It is recommended to work from +**An Excel file template can be found [here](assets/data_model_templates/onto_name (onto_label)/resources.xlsx). It is recommended to work from the template.** The expected worksheets of the Excel file are: @@ -51,14 +91,14 @@ For further information about resources, see [here](./dsp-tools-create-ontologie -## JSON project file: "properties" section from Excel file +## "properties" section With dsp-tools, the `properties` section used in a data model (JSON) can be created from an Excel file. The command for this is documented [here](./dsp-tools-usage.md#create-the-properties-section-of-a-json-project-file-from-an-excel-file). Only the first worksheet of the Excel file is considered and only XLSX files are allowed. The `properties` section can be inserted into the ontology file and then be uploaded onto a DSP server. -**An Excel file template can be found [here](assets/templates/properties_template.xlsx). It is recommended to work +**An Excel file template can be found [here](assets/data_model_templates/onto_name (onto_label)/properties.xlsx). It is recommended to work from the template.** The Excel sheet must have the following structure: @@ -84,7 +124,7 @@ For further information about properties, see [here](./dsp-tools-create-ontologi -## JSON project file: "lists" section from Excel file(s) +## "lists" section With dsp-tools, the "lists" section of a JSON project file can be created from one or several Excel files. The lists can then be inserted into a JSON project file and uploaded to a DSP server. The command for this is documented @@ -116,8 +156,8 @@ Some notes: printed out if the list is not valid. **It is recommended to work from the following templates: -[en.xlsx](assets/templates/lists/en.xlsx): File with the English labels -[de.xlsx](assets/templates/lists/de.xlsx): File with the German labels** +[en.xlsx](assets/data_model_templates/lists/en.xlsx): File with the English labels +[de.xlsx](assets/data_model_templates/lists/de.xlsx): File with the German labels** The output of the above command, with the template files, is: @@ -190,37 +230,3 @@ The output of the above command, with the template files, is: ] } ``` - - - -## XML data file from Excel/CSV file - -There are two use cases for a transformation from Excel/CSV to XML: - - - The CLI command `dsp-tools excel2xml` creates an XML file from an Excel/CSV file which is already structured - according to the DSP specifications. This is mostly used for DaSCH-interal data migration. - - The module `excel2xml` can be imported into a custom Python script that transforms any tabular data into an XML. This - use case is more frequent, because data from research projects have a variety of formats/structures. The module - `excel2xml` is documented [here](./dsp-tools-excel2xml.md). - - -### CLI command `excel2xml` - -The command line tool is used as follows: -```bash -dsp-tools excel2xml data-source.xlsx 1234 shortname -``` - -There are no flags/options for this command. - -The Excel file must be structured as in this image: -![img-excel2xml.png](assets/images/img-excel2xml.png) - -Some notes: - - - The special tags ``, ``, and `` are represented as resources of restype `Annotation`, -`LinkObj`, and `Region`. - - The columns "ark", "iri", and "creation_date" are only used for DaSCH-internal data migration. - - If `file` is provided, but no `file permissions`, an attempt will be started to deduce them from the resource - permissions (`res-default` --> `prop-default` and `res-restricted` --> `prop-restricted`). If this attempt is not - successful, a `BaseError` will be raised. diff --git a/docs/dsp-tools-excel2xml.md b/docs/dsp-tools-excel2xml.md index 790da569f..9934b0948 100644 --- a/docs/dsp-tools-excel2xml.md +++ b/docs/dsp-tools-excel2xml.md @@ -1,11 +1,13 @@ [![PyPI version](https://badge.fury.io/py/dsp-tools.svg)](https://badge.fury.io/py/dsp-tools) -# `excel2xml`: Convert a data source to XML -dsp-tools assists you in converting a data source in CSV/XLS(X) format to an XML file. +# Module `excel2xml`: Convert a data source to XML -| **Hint** | -|-------------------------------------------------------------------------------------------------------------------------------------------| -| This page is about the **module** `excel2xml`. The CLI command is documented [here](dsp-tools-excel.md#xml-data-file-from-excelcsv-file). | +This page is about the module `excel2xml` that can be imported into a custom Python script that transforms any tabular +data into an XML. + +There is also a CLI command `dsp-tools excel2xml` that creates an XML file from an Excel/CSV file which is already +structured according to the DSP specifications. The CLI command is documented +[here](./dsp-tools-usage.md#use-the-module-excel2xml-to-convert-a-data-source-to-xml). To demonstrate the usage of the `excel2xml` module, there is a GitHub repository named `0123-import-scripts`. It contains: diff --git a/docs/dsp-tools-usage.md b/docs/dsp-tools-usage.md index 473699c87..277e4a9aa 100644 --- a/docs/dsp-tools-usage.md +++ b/docs/dsp-tools-usage.md @@ -32,13 +32,13 @@ dsp-tools create [options] project_definition.json The following options are available: -- `-s` | `--server` _server_: URL of the DSP server (default: 0.0.0.0:3333) -- `-u` | `--user` _username_: username used for authentication with the DSP API (default: root@example.com) -- `-p` | `--password` _password_: password used for authentication with the DSP API (default: test) -- `-V` | `--validate-only`: If set, only the validation of the JSON file is performed. -- `-l` | `--lists-only`: If set, only the lists are created. Please note that in this case the project must already exist. -- `-v` | `--verbose`: If set, more information about the progress is printed to the console. -- `-d` | `--dump`: If set, dump test files for DSP-API requests. +- `-s` | `--server` (optional, default: `0.0.0.0:3333`): URL of the DSP server +- `-u` | `--user` (optional, default: `root@example.com`): username used for authentication with the DSP API +- `-p` | `--password` (optional, default: `test`): password used for authentication with the DSP API +- `-V` | `--validate-only` (optional): If set, only the validation of the JSON file is performed. +- `-l` | `--lists-only` (optional): If set, only the lists are created. Please note that in this case the project must already exist. +- `-v` | `--verbose` (optional): If set, more information about the progress is printed to the console. +- `-d` | `--dump` (optional): If set, dump test files for DSP-API requests. The command is used to read the definition of a project with its data model(s) (provided in a JSON file) and create it on the DSP server. The following example shows how to upload the project defined in `project_definition.json` to the DSP @@ -61,12 +61,12 @@ dsp-tools get [options] output_file.json The following options are available: -- `-s` | `--server`: URL of the DSP server (default: 0.0.0.0:3333) -- `-u` | `--user`: username used for authentication with the DSP API (default: root@example.com) -- `-p` | `--password`: password used for authentication with the DSP API (default: test) -- `-P` | `--project`: shortcode, shortname or - [IRI](https://en.wikipedia.org/wiki/Internationalized_Resource_Identifier) of the project (mandatory) -- `-v` | `--verbose`: If set, some information about the progress is printed to the console. +- `-s` | `--server` (optional, default: `0.0.0.0:3333`): URL of the DSP server +- `-u` | `--user` (optional, default: `root@example.com`): username used for authentication with the DSP API +- `-p` | `--password` (optional, default: `test`): password used for authentication with the DSP API +- `-P` | `--project` (mandatory): shortcode, shortname or + [IRI](https://en.wikipedia.org/wiki/Internationalized_Resource_Identifier) of the project +- `-v` | `--verbose` (optional): If set, some information about the progress is printed to the console. The command is used to get the definition of a project with its data model(s) from a DSP server and write it into a JSON file. This JSON file can then be used to create the same project on another DSP server. The following example shows how @@ -131,21 +131,34 @@ to use this file to replace internal IDs in an existing XML file to reference ex -## Create the "lists" section of a JSON project file from Excel files +## Create a JSON project file from Excel files + +``` +dsp-tools excel2json data_model_files project.json +``` + +The expected file and folder structures are described [here](./dsp-tools-excel2json.md#json-project-file-from-excel). + + + + +### Create the "lists" section of a JSON project file from Excel files ```bash -dsp-tools excel2lists folder output.json +dsp-tools excel2lists [options] folder output.json ``` -Arguments: - - `folder` (optional, default: "lists"): folder with the Excel file(s) - - `output.json` (optional, default: "lists.json"): Output file +The following options are available: + +- `-v` | `--verbose` (optional): If set, more information about the progress is printed to the console. -The expected Excel format is [documented here](./dsp-tools-excel.md#create-the-lists-section-of-a-json-project-file-from-excel-files). +The expected Excel format is [documented here](./dsp-tools-excel2json.md#lists-section). +**Tip: The command [`excel2json`](#create-a-json-project-file-from-excel-files) might be more convenient to use.** -## Create the "resources" section of a JSON project file from an Excel file + +### Create the "resources" section of a JSON project file from an Excel file ```bash dsp-tools excel2resources excel_file.xlsx output_file.json @@ -154,20 +167,14 @@ dsp-tools excel2resources excel_file.xlsx output_file.json The command is used to create the resources section of an ontology from an Excel file. Therefore, an Excel file has to be provided with the data in the first worksheet of the Excel file. -The following example shows how to create the resources section from an Excel file called `Resources.xlsx`. The output -is written to a file called `resources.json`. - -```bash -dsp-tools excel2resources Resources.xlsx resources.json -``` +The expected Excel format is [documented here](./dsp-tools-excel2json.md#resources-section). -More information about the usage of this command can be -found [here](./dsp-tools-excel.md#create-the-resources-for-a-data-model-from-an-excel-file). +**Tip: The command [`excel2json`](#create-a-json-project-file-from-excel-files) might be more convenient to use.** -## Create the "properties" section of a JSON project file from an Excel file +### Create the "properties" section of a JSON project file from an Excel file ```bash dsp-tools excel2properties excel_file.xlsx output_file.json @@ -176,32 +183,38 @@ dsp-tools excel2properties excel_file.xlsx output_file.json The command is used to create the properties section of an ontology from an Excel file. Therefore, an Excel file has to be provided with the data in the first worksheet of the Excel file. -The following example shows how to create the properties section from an Excel file called `Properties.xlsx`. The output -is written to a file called `properties.json`. - -```bash -dsp-tools excel2properties Properties.xlsx properties.json -``` +The expected Excel format is [documented here](./dsp-tools-excel2json.md#properties-section). -More information about the usage of this command can be found -[here](./dsp-tools-excel.md#create-the-properties-for-a-data-model-from-an-excel-file). +**Tip: The command [`excel2json`](#create-a-json-project-file-from-excel-files) might be more convenient to use.** ## Create an XML file from Excel/CSV + +If your data source is already structured according to the DSP specifications, but it is not in XML format yet, the +command `excel2xml` will transform it into XML. This is mostly used for DaSCH-interal data migration. + ```bash dsp-tools excel2xml data-source.xlsx project_shortcode ontology_name ``` Arguments: - - data-source.xlsx: An Excel/CSV file that is structured according to [these requirements](dsp-tools-excel.md#cli-command-excel2xml) - - project_shortcode: The four-digit hexadecimal shortcode of the project - - ontology_name: the name of the ontology that the data belongs to + - data-source.xlsx (mandatory): An Excel/CSV file that is structured as explained below + - project_shortcode (mandatory): The four-digit hexadecimal shortcode of the project + - ontology_name (mandatory): the name of the ontology that the data belongs to -If your data source is already structured according to the DSP specifications, but it is not in XML format yet, the -command `excel2xml` will transform it into XML. This is mostly used for DaSCH-interal data migration. There are no -flags/options for this command. The details of this command are documented [here](dsp-tools-excel.md#cli-command-excel2xml). +The Excel file must be structured as in this image: +![img-excel2xml.png](assets/images/img-excel2xml.png) + +Some notes: + + - The special tags ``, ``, and `` are represented as resources of restype `Annotation`, +`LinkObj`, and `Region`. + - The columns "ark", "iri", and "creation_date" are only used for DaSCH-internal data migration. + - If `file` is provided, but no `file permissions`, an attempt will be started to deduce them from the resource + permissions (`res-default` --> `prop-default` and `res-restricted` --> `prop-restricted`). If this attempt is not + successful, a `BaseError` will be raised. If your data source is not yet structured according to the DSP specifications, you need a custom Python script for the data transformation. For this, you might want to import the module `excel2xml` into your Python script, which is diff --git a/docs/index.md b/docs/index.md index 7e7774c7f..517b323a4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -20,6 +20,8 @@ dsp-tools helps you with the following tasks: a DSP server and writes it into a JSON file. - [`dsp-tools xmlupload`](./dsp-tools-usage.md#upload-data-to-a-dsp-server) uploads data from an XML file (bulk data import) and writes the mapping from internal IDs to IRIs into a local file. +- [`dsp-tools excel2json`](./dsp-tools-usage.md#create-a-json-project-file-from-excel-files) creates an entire JSON + project file from a folder with Excel files in it. - [`dsp-tools excel2lists`](./dsp-tools-usage.md#create-the-lists-section-of-a-json-project-file-from-excel-files) creates the "lists" section of a JSON project file from one or several Excel files. The resulting section can be integrated into a JSON project file and then be uploaded to a DSP server with `dsp-tools create`. diff --git a/knora/dsp_tools.py b/knora/dsp_tools.py index 8ffc37a97..0b68b7f7d 100644 --- a/knora/dsp_tools.py +++ b/knora/dsp_tools.py @@ -9,6 +9,7 @@ from importlib.metadata import version from knora.dsplib.utils.excel_to_json_lists import excel2lists, validate_lists_section_with_schema +from knora.dsplib.utils.excel_to_json_project import excel2json from knora.dsplib.utils.excel_to_json_properties import excel2properties from knora.dsplib.utils.excel_to_json_resources import excel2resources from knora.dsplib.utils.id_to_iri import id_to_iri @@ -89,7 +90,17 @@ def program(user_args: list[str]) -> None: parser_upload.add_argument('-I', '--incremental', action='store_true', help='Incremental XML upload') parser_upload.add_argument('xmlfile', help='path to xml file containing the data', default='data.xml') - # excel + # excel2json + parser_excel2json = subparsers.add_parser( + 'excel2json', + help='Create a JSON project file from a folder containing the required Excel files (lists folder, ' + 'properties.xlsx, resources.xlsx)' + ) + parser_excel2json.set_defaults(action='excel2json') + parser_excel2json.add_argument('data_model_files', help='Path to the folder containing the Excel files') + parser_excel2json.add_argument('outfile', help='Path to the output JSON file') + + # excel2lists parser_excel_lists = subparsers.add_parser( 'excel2lists', help='Create the "lists" section of a JSON project file from one or multiple Excel files. If the list should ' @@ -98,26 +109,22 @@ def program(user_args: list[str]) -> None: ) parser_excel_lists.set_defaults(action='excel2lists') parser_excel_lists.add_argument('excelfolder', help='Path to the folder containing the Excel file(s)') - parser_excel_lists.add_argument('outfile', help='Path to the output JSON file containing the "lists" section', - default='lists.json') + parser_excel_lists.add_argument('outfile', help='Path to the output JSON file containing the "lists" section') + parser_excel_lists.add_argument('-v', '--verbose', action='store_true', help=verbose_text) # excel2resources parser_excel_resources = subparsers.add_parser('excel2resources', help='Create a JSON file from an Excel file ' 'containing resources for a DSP ontology. ') parser_excel_resources.set_defaults(action='excel2resources') - parser_excel_resources.add_argument('excelfile', help='Path to the Excel file containing the resources', - default='resources.xlsx') - parser_excel_resources.add_argument('outfile', help='Path to the output JSON file containing the resource data', - default='resources.json') + parser_excel_resources.add_argument('excelfile', help='Path to the Excel file containing the resources') + parser_excel_resources.add_argument('outfile', help='Path to the output JSON file containing the resource data') # excel2properties parser_excel_properties = subparsers.add_parser('excel2properties', help='Create a JSON file from an Excel file ' 'containing properties for a DSP ontology. ') parser_excel_properties.set_defaults(action='excel2properties') - parser_excel_properties.add_argument('excelfile', help='Path to the Excel file containing the properties', - default='properties.xlsx') - parser_excel_properties.add_argument('outfile', help='Path to the output JSON file containing the properties data', - default='properties.json') + parser_excel_properties.add_argument('excelfile', help='Path to the Excel file containing the properties') + parser_excel_properties.add_argument('outfile', help='Path to the output JSON file containing the properties data') # id2iri parser_id2iri = subparsers.add_parser('id2iri', help='Replace internal IDs in an XML with their corresponding IRIs ' @@ -203,9 +210,13 @@ def program(user_args: list[str]) -> None: sipi=args.sipi, verbose=args.verbose, incremental=args.incremental) + elif args.action == 'excel2json': + excel2json(data_model_files=args.data_model_files, + path_to_output_file=args.outfile) elif args.action == 'excel2lists': excel2lists(excelfolder=args.excelfolder, - path_to_output_file=args.outfile) + path_to_output_file=args.outfile, + verbose=args.verbose) elif args.action == 'excel2resources': excel2resources(excelfile=args.excelfile, path_to_output_file=args.outfile) diff --git a/knora/dsplib/utils/excel_to_json_lists.py b/knora/dsplib/utils/excel_to_json_lists.py index f3b84ff00..f8531e54e 100644 --- a/knora/dsplib/utils/excel_to_json_lists.py +++ b/knora/dsplib/utils/excel_to_json_lists.py @@ -6,10 +6,10 @@ from typing import Any, Union, Optional, Tuple import jsonschema +import regex from openpyxl import load_workbook from openpyxl.cell import Cell from openpyxl.worksheet.worksheet import Worksheet -import regex from knora.dsplib.models.helpers import BaseError from knora.dsplib.utils.shared import simplify_name @@ -253,7 +253,8 @@ def validate_lists_section_with_schema( """ if bool(path_to_json_project_file) == bool(lists_section): raise BaseError("Validation of the 'lists' section works only if exactly one of the two arguments is given.") - with open("knora/dsplib/schemas/lists-only.json") as schema: + current_dir = os.path.dirname(os.path.realpath(__file__)) + with open(os.path.join(current_dir, "../schemas/lists-only.json")) as schema: lists_schema = json.load(schema) if path_to_json_project_file: @@ -297,24 +298,30 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]: return excel_file_paths -def excel2lists(excelfolder: str, path_to_output_file: Optional[str] = None) -> list[dict[str, Any]]: +def excel2lists( + excelfolder: str, + path_to_output_file: Optional[str] = None, + verbose: bool = False +) -> list[dict[str, Any]]: """ Converts lists described in Excel files into a "lists" section that can be inserted into a JSON project file. Args: excelfolder: path to the folder containing the Excel file(s) path_to_output_file: if provided, the output is written into this JSON file + verbose: verbose switch Returns: the "lists" section as Python list """ # read the data excel_file_paths = _extract_excel_file_paths(excelfolder) - print("The following Excel files will be processed:") - [print(f" - {filename}") for filename in excel_file_paths] + if verbose: + print("The following Excel files will be processed:") + [print(f" - {filename}") for filename in excel_file_paths] # construct the "lists" section - finished_lists = _make_json_lists_from_excel(excel_file_paths, verbose=True) + finished_lists = _make_json_lists_from_excel(excel_file_paths, verbose=verbose) validate_lists_section_with_schema(lists_section=finished_lists) # write final "lists" section diff --git a/knora/dsplib/utils/excel_to_json_project.py b/knora/dsplib/utils/excel_to_json_project.py new file mode 100644 index 000000000..0fbbc3d8d --- /dev/null +++ b/knora/dsplib/utils/excel_to_json_project.py @@ -0,0 +1,111 @@ +import json +import os +import re + +from knora.dsplib.models.helpers import BaseError +from knora.dsplib.utils.excel_to_json_lists import excel2lists +from knora.dsplib.utils.excel_to_json_properties import excel2properties +from knora.dsplib.utils.excel_to_json_resources import excel2resources + + +def excel2json( + data_model_files: str, + path_to_output_file: str +) -> None: + """ + Converts a folder containing Excel files into a JSON data model file. The folder must be structured like this: + + :: + + data_model_files + |-- lists + | |-- de.xlsx + | `-- en.xlsx + `-- onto_name (onto_label) + |-- properties.xlsx + `-- resources.xlsx + + The names of the files must be exactly like in the example. The folder "lists" can be missing, because it is + optional to have lists in a DSP project. Only XLSX files are allowed. + + Args: + data_model_files: path to the folder (called "data_model_files" in the example) + path_to_output_file: path to the file where the output JSON file will be saved + + Returns: + None + """ + + # validate input + # -------------- + if not os.path.isdir(data_model_files): + raise BaseError(f"ERROR: {data_model_files} is not a directory.") + folder = [x for x in os.scandir(data_model_files) if not re.search(r"^(\.|~\$).+", x.name)] + + processed_files = [] + onto_folders = [x for x in folder if os.path.isdir(x) and re.search(r"([\w.-]+) (\([\w.\- ]+\))", x.name)] + if len(onto_folders) == 0: + raise BaseError(f"'{data_model_files}' must contain at least one subfolder named after the pattern " + f"'onto_name (onto_label)'") + for onto_folder in onto_folders: + contents = sorted([x.name for x in os.scandir(onto_folder) if not re.search(r"^(\.|~\$).+", x.name)]) + if contents != ["properties.xlsx", "resources.xlsx"]: + raise BaseError(f"ERROR: '{data_model_files}/{onto_folder.name}' must contain one file 'properties.xlsx' " + f"and one file 'resources.xlsx', but nothing else.") + processed_files.extend([f"{data_model_files}/{onto_folder.name}/{file}" for file in contents]) + + listfolder = [x for x in folder if os.path.isdir(x) and x.name == "lists"] + if listfolder: + listfolder_contents = list(os.scandir(listfolder[0])) + if not all([re.search(r"(de|en|fr|it|rm).xlsx", file.name) for file in listfolder_contents]): + raise BaseError(f"The only files allowed in '{data_model_files}/lists' are en.xlsx, de.xlsx, fr.xlsx, " + f"it.xlsx, rm.xlsx") + processed_files = [f"{data_model_files}/lists/{file.name}" for file in listfolder_contents] + processed_files + + if len(onto_folders) + len(listfolder) != len(folder): + raise BaseError(f"The only allowed subfolders in '{data_model_files}' are 'lists' and folders that match the " + f"pattern 'onto_name (onto_label)'") + + print(f"The following files will be processed:") + [print(f" - {file}") for file in processed_files] + + + # create output + # ------------- + lists = excel2lists(f"{data_model_files}/lists") if listfolder else None + + ontologies = [] + for onto_folder in onto_folders: + name, label = re.search(r"([\w.-]+) \(([\w.\- ]+)\)", onto_folder.name).groups() + ontologies.append({ + "name": name, + "label": label, + "properties": excel2properties(f"{data_model_files}/{onto_folder.name}/properties.xlsx"), + "resources": excel2resources(f"{data_model_files}/{onto_folder.name}/resources.xlsx") + }) + + project = { + "prefixes": { + "": "" + }, + "$schema": "https://raw.githubusercontent.com/dasch-swiss/dsp-tools/main/knora/dsplib/schemas/ontology.json", + "project": { + "shortcode": "", + "shortname": "", + "longname": "", + "descriptions": { + "en": "" + }, + "keywords": [ + "" + ] + } + } + if lists: + project["project"]["lists"] = lists + project["project"]["ontologies"] = ontologies + + with open(path_to_output_file, "w") as f: + json.dump(project, f, indent=4, ensure_ascii=False) + + print(f"JSON project file successfully saved at {path_to_output_file}") diff --git a/knora/dsplib/utils/excel_to_json_properties.py b/knora/dsplib/utils/excel_to_json_properties.py index 78b800457..3f1c87897 100644 --- a/knora/dsplib/utils/excel_to_json_properties.py +++ b/knora/dsplib/utils/excel_to_json_properties.py @@ -1,11 +1,13 @@ import json +import os import re from typing import Any, Optional + import jsonschema import pandas as pd from knora.dsplib.models.helpers import BaseError -from knora.dsplib.utils.shared import prepare_dataframe +from knora.dsplib.utils.shared import prepare_dataframe, check_notna languages = ["en", "de", "fr", "it", "rm"] @@ -20,7 +22,8 @@ def _validate_properties_with_schema(properties_list: list[dict[str, Any]]) -> b Returns: True if the "properties" section passed validation. Otherwise, a BaseError with a detailed error report is raised. """ - with open("knora/dsplib/schemas/properties-only.json") as schema: + current_dir = os.path.dirname(os.path.realpath(__file__)) + with open(os.path.join(current_dir, "../schemas/properties-only.json")) as schema: properties_schema = json.load(schema) try: jsonschema.validate(instance=properties_list, schema=properties_schema) @@ -99,8 +102,15 @@ def excel2properties(excelfile: str, path_to_output_file: Optional[str] = None) df: pd.DataFrame = pd.read_excel(excelfile) df = prepare_dataframe( df=df, - required_columns=["name", "super", "object", "gui_element"], - location_of_sheet=f"File '{excelfile}'") + required_columns=["name"], + location_of_sheet=f"File '{excelfile}'" + ) + + required = ["super", "object", "gui_element"] + for index, row in df.iterrows(): + for req in required: + if not check_notna(row[req]): + raise BaseError(f"'{excelfile}' has a missing value in row {index + 2}, column '{req}'") # transform every row into a property props = [_row2prop(row, i, excelfile) for i, row in df.iterrows()] diff --git a/knora/dsplib/utils/excel_to_json_resources.py b/knora/dsplib/utils/excel_to_json_resources.py index af9615e03..24a55ad95 100644 --- a/knora/dsplib/utils/excel_to_json_resources.py +++ b/knora/dsplib/utils/excel_to_json_resources.py @@ -1,9 +1,12 @@ import json +import os from typing import Any, Optional + import jsonschema import pandas as pd + from knora.dsplib.models.helpers import BaseError -from knora.dsplib.utils.shared import prepare_dataframe +from knora.dsplib.utils.shared import prepare_dataframe, check_notna languages = ["en", "de", "fr", "it", "rm"] @@ -18,7 +21,8 @@ def _validate_resources_with_schema(resources_list: list[dict[str, Any]]) -> boo Returns: True if the "resources" section passed validation. Otherwise, a BaseError with a detailed error report is raised. """ - with open("knora/dsplib/schemas/resources-only.json") as schema: + current_dir = os.path.dirname(os.path.realpath(__file__)) + with open(os.path.join(current_dir, "../schemas/resources-only.json")) as schema: resources_schema = json.load(schema) try: jsonschema.validate(instance=resources_list, schema=resources_schema) @@ -92,10 +96,14 @@ def excel2resources(excelfile: str, path_to_output_file: Optional[str] = None) - all_classes_df: pd.DataFrame = pd.read_excel(excelfile) all_classes_df = prepare_dataframe( df=all_classes_df, - required_columns=["name", "super"], + required_columns=["name"], location_of_sheet=f"Sheet 'classes' in file '{excelfile}'" ) + for index, row in all_classes_df.iterrows(): + if not check_notna(row["super"]): + raise BaseError(f"Sheet 'classes' of '{excelfile}' has a missing value in row {index + 2}, column 'super'") + # transform every row into a resource resources = [_row2resource(row, excelfile) for i, row in all_classes_df.iterrows()] _validate_resources_with_schema(resources) diff --git a/mkdocs.yml b/mkdocs.yml index 44a908361..5082a4006 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -11,9 +11,9 @@ nav: - JSON project definition format: - Overview: dsp-tools-create.md - Ontologies: dsp-tools-create-ontologies.md - - excel2xml: dsp-tools-excel2xml.md - Bulk data import: dsp-tools-xmlupload.md - - Excel file processing: dsp-tools-excel.md + - excel2json: dsp-tools-excel2json.md + - excel2xml: dsp-tools-excel2xml.md - Information for developers: dsp-tools-information-for-developers.md - Changelog: changelog.md diff --git a/test/e2e/test_tools.py b/test/e2e/test_tools.py index 5da6c68de..ccbb9480e 100644 --- a/test/e2e/test_tools.py +++ b/test/e2e/test_tools.py @@ -4,25 +4,27 @@ can be called with the basic configuration that is available via CLI. More thorough testing of each method is done in separate unit tests/e2e tests. """ +import copy +import datetime import json -import unittest import os -import datetime import re +import unittest + import jsonpath_ng import jsonpath_ng.ext -import copy from knora.dsplib.utils.excel_to_json_lists import excel2lists, validate_lists_section_with_schema +from knora.dsplib.utils.excel_to_json_project import excel2json from knora.dsplib.utils.excel_to_json_properties import excel2properties from knora.dsplib.utils.excel_to_json_resources import excel2resources from knora.dsplib.utils.id_to_iri import id_to_iri -from knora.dsplib.utils.onto_create_ontology import create_project -from knora.dsplib.utils.onto_validate import validate_project from knora.dsplib.utils.onto_create_lists import create_lists +from knora.dsplib.utils.onto_create_ontology import create_project from knora.dsplib.utils.onto_get import get_ontology -from knora.dsplib.utils.xml_upload import xml_upload +from knora.dsplib.utils.onto_validate import validate_project from knora.dsplib.utils.shared import validate_xml_against_schema +from knora.dsplib.utils.xml_upload import xml_upload from knora.excel2xml import excel2xml @@ -294,6 +296,17 @@ def test_xml_upload(self) -> None: os.remove(id2iri_replaced_xml_filename) + def test_excel_to_json_project(self) -> None: + excel2json(data_model_files="testdata/excel2json_files", + path_to_output_file="testdata/tmp/_out_project.json") + with open("testdata/excel2json-expected-output.json") as f: + output_expected = json.load(f) + with open("testdata/tmp/_out_project.json") as f: + output = json.load(f) + self.assertDictEqual(output, output_expected) + os.remove("testdata/tmp/_out_project.json") + + def test_excel_to_json_list(self) -> None: excel2lists(excelfolder="testdata/lists_multilingual", path_to_output_file="testdata/tmp/_lists-out.json") @@ -302,14 +315,14 @@ def test_excel_to_json_list(self) -> None: def test_excel_to_json_resources(self) -> None: - excel2resources(excelfile="testdata/Resources.xlsx", + excel2resources(excelfile="testdata/excel2json_files/test-name (test_label)/resources.xlsx", path_to_output_file="testdata/tmp/_out_resources.json") self.assertTrue(os.path.isfile("testdata/tmp/_out_resources.json")) os.remove("testdata/tmp/_out_resources.json") def test_excel_to_json_properties(self) -> None: - excel2properties(excelfile="testdata/Properties.xlsx", + excel2properties(excelfile="testdata/excel2json_files/test-name (test_label)/properties.xlsx", path_to_output_file="testdata/tmp/_out_properties.json") self.assertTrue(os.path.isfile("testdata/tmp/_out_properties.json")) os.remove("testdata/tmp/_out_properties.json") diff --git a/test/unittests/test_excel_to_json_properties.py b/test/unittests/test_excel_to_json_properties.py index d9d4c3b5e..94722b4a8 100644 --- a/test/unittests/test_excel_to_json_properties.py +++ b/test/unittests/test_excel_to_json_properties.py @@ -1,10 +1,11 @@ """unit tests for excel to properties""" +import json import os import unittest -import json +from typing import Any + import jsonpath_ng import jsonpath_ng.ext -from typing import Any from knora.dsplib.utils import excel_to_json_properties as e2j @@ -24,7 +25,7 @@ def tearDownClass(cls) -> None: os.rmdir('testdata/tmp') def test_excel2properties(self) -> None: - excelfile = "testdata/Properties.xlsx" + excelfile = "testdata/excel2json_files/test-name (test_label)/properties.xlsx" outfile = "testdata/tmp/_out_properties.json" output_from_method = e2j.excel2properties(excelfile, outfile) @@ -35,7 +36,7 @@ def test_excel2properties(self) -> None: "hasInterval", "hasBoolean", "hasGeoname", "partOfDocument"] excel_supers = [["hasLinkTo"], ["hasValue", "dcterms:creator"], ["hasValue"], ["hasValue"], ["hasLinkTo"], ["hasValue"], ["hasValue"], ["hasValue"], ["hasRepresentation"], - ["hasValue", "dcterms:description"], ["hasValue"],["hasValue"], ["hasColor"], ["hasValue"], + ["hasValue", "dcterms:description"], ["hasValue"], ["hasValue"], ["hasColor"], ["hasValue"], ["hasValue"], ["hasSequenceBounds"], ["hasValue"], ["hasValue"], ["isPartOf"]] excel_objects = [":GenericAnthroponym", "TextValue", "ListValue", "ListValue", ":Titles", "ListValue", "IntValue", "DateValue", "Representation", "TextValue", "DateValue", "UriValue", diff --git a/test/unittests/test_excel_to_json_resources.py b/test/unittests/test_excel_to_json_resources.py index a92add6c9..f2a4e81e4 100644 --- a/test/unittests/test_excel_to_json_resources.py +++ b/test/unittests/test_excel_to_json_resources.py @@ -1,10 +1,12 @@ """unit tests for excel to resource""" +import json import os import unittest -import json +from typing import Any + import jsonpath_ng import jsonpath_ng.ext -from typing import Any + from knora.dsplib.utils import excel_to_json_resources as e2j @@ -24,7 +26,7 @@ def tearDownClass(cls) -> None: def test_excel2resources(self) -> None: - excelfile = "testdata/Resources.xlsx" + excelfile = "testdata/excel2json_files/test-name (test_label)/resources.xlsx" outfile = "testdata/tmp/_out_resources.json" output_from_method = e2j.excel2resources(excelfile, outfile) diff --git a/testdata/Properties.xlsx b/testdata/Properties.xlsx deleted file mode 100644 index 9d5e8d918..000000000 Binary files a/testdata/Properties.xlsx and /dev/null differ diff --git a/testdata/Resources.xlsx b/testdata/Resources.xlsx deleted file mode 100644 index 88759a994..000000000 Binary files a/testdata/Resources.xlsx and /dev/null differ diff --git a/testdata/excel2json-expected-output.json b/testdata/excel2json-expected-output.json new file mode 100644 index 000000000..77b1b0ffb --- /dev/null +++ b/testdata/excel2json-expected-output.json @@ -0,0 +1,1078 @@ +{ + "prefixes": { + "": "" + }, + "$schema": "https://raw.githubusercontent.com/dasch-swiss/dsp-tools/main/knora/dsplib/schemas/ontology.json", + "project": { + "shortcode": "", + "shortname": "", + "longname": "", + "descriptions": { + "en": "" + }, + "keywords": [ + "" + ], + "lists": [ + { + "name": "first-list", + "labels": { + "fr": "première liste", + "en": "first list", + "de": "erste Liste" + }, + "comments": { + "fr": "première liste", + "en": "first list", + "de": "erste Liste" + }, + "nodes": [ + { + "name": "special-characters-12-0-are-embedded", + "labels": { + "fr": "caractères spéciales 1&2-%*_0 dedans", + "en": "special characters 1&2-%*_0 are embedded", + "de": "Spezialzeichen 1&2-%*_0 sind eingebettet" + }, + "nodes": [ + { + "name": "very", + "labels": { + "fr": "très", + "en": "very", + "de": "sehr" + }, + "nodes": [ + { + "name": "deeply", + "labels": { + "fr": "profondément", + "en": "deeply", + "de": "tief" + }, + "nodes": [ + { + "name": "nested", + "labels": { + "fr": "niché!", + "en": "nested!", + "de": "verschachtelt!" + } + } + ] + } + ] + } + ] + } + ] + }, + { + "name": "second-list", + "labels": { + "fr": "deuxième liste", + "en": "second list", + "de": "zweite Liste" + }, + "comments": { + "fr": "deuxième liste", + "en": "second list", + "de": "zweite Liste" + }, + "nodes": [ + { + "name": "first-node", + "labels": { + "fr": "premier noeud", + "en": "first node", + "de": "erster Knoten" + } + }, + { + "name": "duplicate-nodename", + "labels": { + "fr": "noeud doublé", + "en": "duplicate nodename", + "de": "Doppelung" + } + }, + { + "name": "duplicate-nodename-2", + "labels": { + "fr": "noeud doublé!", + "en": "duplicate nodename!", + "de": "Doppelung!" + } + }, + { + "name": "duplicate-nodename-3", + "labels": { + "fr": "noeud doublé?", + "en": "duplicate nodename?", + "de": "Doppelung?" + } + } + ] + } + ], + "ontologies": [ + { + "name": "test-name", + "label": "test_label", + "properties": [ + { + "name": "correspondsToGenericAnthroponym", + "super": [ + "hasLinkTo" + ], + "object": ":GenericAnthroponym", + "labels": { + "en": "only English" + }, + "comments": { + "en": "I had already looked at several pieces of property when, one day, the notary, who had been giving me some necessary directions for one of my explorations, said to me:", + "de": "Ich hatte bereits mehrere Grundstücke besichtigt, als eines Tages der Notar, der mir die notwendigen Anweisungen für eine meiner Erkundungen gegeben hatte, zu mir sagte:", + "fr": "J'avais déjà examiné plusieurs propriétés quand, un jour, le notaire, qui me donnait des indications nécessaires pour une de mes explorations, me dit :", + "it": "Avevo già visto diverse proprietà quando un giorno il notaio,", + "rm": "Rumantsch" + }, + "gui_element": "Searchbox" + }, + { + "name": "hasAnthroponym", + "super": [ + "hasValue", + "dcterms:creator" + ], + "object": "TextValue", + "labels": { + "de": "only German" + }, + "comments": { + "en": "A strange chance put me in possession of this journal.", + "de": "Ein seltsamer Zufall brachte mich in den Besitz dieses Tagebuchs.", + "fr": "Un étrange hasard m'a mis en possession de ce journal.", + "it": "Uno strano caso mi mise in possesso di questo diario.", + "rm": "Rumantsch" + }, + "gui_element": "Richtext" + }, + { + "name": "hasGender", + "super": [ + "hasValue" + ], + "object": "ListValue", + "labels": { + "fr": "only French" + }, + "comments": { + "en": "I know nothing of it whatever; but if you would like to see it, monsieur, here are the precise directions how to find it.", + "de": "Ich weiß nichts davon, aber wenn Sie es sehen möchten, Monsieur, finden Sie hier die genaue Wegbeschreibung.", + "fr": "Je n'en sais rien du tout ; mais si vous voulez la voir, monsieur, voici les indications précises pour la trouver.", + "it": "Non ne so nulla; ma se volete vederla, signore, eccovi le indicazioni precise per trovarla.", + "rm": "Rumantsch" + }, + "gui_element": "List", + "gui_attributes": { + "hlist": "gender" + } + }, + { + "name": "isDesignatedAs", + "super": [ + "hasValue" + ], + "object": "ListValue", + "labels": { + "it": "only Italian" + }, + "comments": { + "en": "You will have to arrange the affair with the curé of the village of ——.\"", + "de": "Sie werden die Angelegenheit mit dem Pfarrer des Dorfes -- regeln müssen.\"", + "fr": "Vous devrez arranger l'affaire avec le curé du village de --.\"", + "it": "Dovrete organizzare l'affare con il curato del villaggio di --\".", + "rm": "Rumantsch" + }, + "gui_element": "Radio", + "gui_attributes": { + "hlist": "designation" + } + }, + { + "name": "hasTitle", + "super": [ + "hasLinkTo" + ], + "object": ":Titles", + "labels": { + "rm": "only Romansh" + }, + "comments": { + "en": "A strange chance put me in possession of this journal.", + "de": "Ein seltsamer Zufall brachte mich in den Besitz dieses Tagebuchs.", + "fr": "Un étrange hasard m'a mis en possession de ce journal.", + "it": "Uno strano caso mi mise in possesso di questo diario.", + "rm": "Rumantsch" + }, + "gui_element": "Searchbox" + }, + { + "name": "hasStatus", + "super": [ + "hasValue" + ], + "object": "ListValue", + "labels": { + "en": "status", + "fr": "statut", + "rm": "Rumantsch" + }, + "comments": { + "en": "only English" + }, + "gui_element": "List", + "gui_attributes": { + "hlist": "status" + } + }, + { + "name": "hasLifeYearAmount", + "super": [ + "hasValue" + ], + "object": "IntValue", + "labels": { + "en": "age", + "fr": "Âge", + "rm": "Rumantsch" + }, + "comments": { + "de": "only German" + }, + "gui_element": "Spinbox" + }, + { + "name": "hasBirthDate", + "super": [ + "hasValue" + ], + "object": "DateValue", + "labels": { + "en": "birth date", + "fr": "Date de naissance", + "rm": "Rumantsch" + }, + "comments": { + "fr": "only French" + }, + "gui_element": "Date" + }, + { + "name": "hasRepresentation", + "super": [ + "hasRepresentation" + ], + "object": "Representation", + "labels": { + "en": "has a multimedia file", + "de": "hat eine Multimediadatei" + }, + "comments": { + "rm": "only Rumantsch" + }, + "gui_element": "Searchbox" + }, + { + "name": "hasRemarks", + "super": [ + "hasValue", + "dcterms:description" + ], + "object": "TextValue", + "labels": { + "en": "remark", + "fr": "Commentaire", + "rm": "Rumantsch" + }, + "gui_element": "Textarea" + }, + { + "name": "hasTerminusPostQuem", + "super": [ + "hasValue" + ], + "object": "DateValue", + "labels": { + "en": "terminus post quem", + "fr": "terminus post quem", + "rm": "Rumantsch" + }, + "comments": { + "en": "I had already looked at several pieces of property when, one day, the notary, who had been giving me some necessary directions for one of my explorations, said to me:", + "de": "Ich hatte bereits mehrere Grundstücke besichtigt, als eines Tages der Notar, der mir die notwendigen Anweisungen für eine meiner Erkundungen gegeben hatte, zu mir sagte:", + "fr": "J'avais déjà examiné plusieurs propriétés quand, un jour, le notaire, qui me donnait des indications nécessaires pour une de mes explorations, me dit :", + "it": "Avevo già visto diverse proprietà quando un giorno il notaio,", + "rm": "Rumantsch" + }, + "gui_element": "Date" + }, + { + "name": "hasGND", + "super": [ + "hasValue" + ], + "object": "UriValue", + "labels": { + "en": "GND", + "de": "GND", + "fr": "GND", + "it": "GND", + "rm": "Rumantsch" + }, + "comments": { + "en": "Gemeinsame Normdatei", + "de": "Gemeinsame Normdatei", + "fr": "Gemeinsame Normdatei", + "it": "Gemeinsame Normdatei", + "rm": "Rumantsch" + }, + "gui_element": "SimpleText", + "gui_attributes": { + "size": 100 + } + }, + { + "name": "hasColor", + "super": [ + "hasColor" + ], + "object": "ColorValue", + "labels": { + "en": "Color", + "de": "Farbe", + "rm": "Rumantsch" + }, + "comments": { + "en": "Color", + "de": "Farbe" + }, + "gui_element": "Colorpicker" + }, + { + "name": "hasDecimal", + "super": [ + "hasValue" + ], + "object": "DecimalValue", + "labels": { + "en": "Decimal number", + "de": "Dezimalzahl", + "fr": "Chiffre décimale", + "rm": "Rumantsch" + }, + "comments": { + "en": "Decimal number", + "de": "Dezimalzahl", + "fr": "Chiffre décimale" + }, + "gui_element": "Slider", + "gui_attributes": { + "min": 0.0, + "max": 100.0 + } + }, + { + "name": "hasTime", + "super": [ + "hasValue" + ], + "object": "TimeValue", + "labels": { + "en": "Time", + "de": "Zeit", + "fr": "Temps", + "rm": "Rumantsch" + }, + "comments": { + "en": "Time", + "de": "Zeit", + "fr": "Temps" + }, + "gui_element": "TimeStamp" + }, + { + "name": "hasInterval", + "super": [ + "hasSequenceBounds" + ], + "object": "IntervalValue", + "labels": { + "en": "Time interval", + "de": "Zeitintervall" + }, + "comments": { + "en": "Time interval", + "de": "Zeitintervall" + }, + "gui_element": "Interval" + }, + { + "name": "hasBoolean", + "super": [ + "hasValue" + ], + "object": "BooleanValue", + "labels": { + "en": "Boolean value", + "de": "Bool'sche Variable" + }, + "comments": { + "en": "Boolean value", + "de": "Bool'sche Variable" + }, + "gui_element": "Checkbox" + }, + { + "name": "hasGeoname", + "super": [ + "hasValue" + ], + "object": "GeonameValue", + "labels": { + "en": "Geoname link", + "de": "Link zu Geonames" + }, + "comments": { + "en": "Geoname link", + "de": "Link zu Geonames" + }, + "gui_element": "Geonames" + }, + { + "name": "partOfDocument", + "super": [ + "isPartOf" + ], + "object": ":Documents", + "labels": { + "en": "is part of a document", + "de": "ist Teil eines Dokuments" + }, + "comments": { + "en": "is part of a document", + "de": "ist Teil eines Dokuments" + }, + "gui_element": "Searchbox" + } + ], + "resources": [ + { + "name": "Owner", + "super": [ + "Resource", + "dcterms:fantasy" + ], + "labels": { + "en": "Owner", + "de": "Eigentümer", + "fr": "Propriétaire", + "it": "Proprietario", + "rm": "Rumantsch" + }, + "comments": { + "en": "A strange chance put me in possession of this journal.", + "de": "Ein seltsamer Zufall brachte mich in den Besitz dieses Tagebuchs.", + "fr": "Un étrange hasard m'a mis en possession de ce journal.", + "it": "Uno strano caso mi mise in possesso di questo diario.", + "rm": "Rumantsch" + }, + "cardinalities": [ + { + "propname": ":hasAnthroponym", + "cardinality": "1", + "gui_order": 1 + }, + { + "propname": ":isOwnerOf", + "cardinality": "0-1", + "gui_order": 2 + }, + { + "propname": ":correspondsToGenericAnthroponym", + "cardinality": "0-n", + "gui_order": 3 + }, + { + "propname": ":hasAlias", + "cardinality": "1", + "gui_order": 4 + }, + { + "propname": ":hasGender", + "cardinality": "0-n", + "gui_order": 5 + }, + { + "propname": ":isDesignatedAs", + "cardinality": "0-1", + "gui_order": 6 + }, + { + "propname": ":hasTitle", + "cardinality": "1-n", + "gui_order": 7 + }, + { + "propname": ":hasStatus", + "cardinality": "0-1", + "gui_order": 8 + }, + { + "propname": ":hasFamilyRelationTo", + "cardinality": "1-n", + "gui_order": 9 + }, + { + "propname": ":hasLifeYearAmount", + "cardinality": "0-1", + "gui_order": 10 + }, + { + "propname": ":hasBirthDate", + "cardinality": "0-1", + "gui_order": 11 + }, + { + "propname": ":hasDeathDate", + "cardinality": "0-1", + "gui_order": 12 + }, + { + "propname": ":hasBibliography", + "cardinality": "1-n", + "gui_order": 13 + }, + { + "propname": ":hasRemarks", + "cardinality": "1-n", + "gui_order": 14 + } + ] + }, + { + "name": "Title", + "super": [ + "Resource" + ], + "labels": { + "en": "Title", + "de": "Titel", + "fr": "Titre", + "it": "Titolo", + "rm": "Rumantsch" + }, + "comments": { + "en": "Only English" + }, + "cardinalities": [ + { + "propname": ":isTitle", + "cardinality": "1", + "gui_order": 1 + }, + { + "propname": ":isTitleInEnglish", + "cardinality": "0-1", + "gui_order": 2 + }, + { + "propname": ":isTitleInFrench", + "cardinality": "0-1", + "gui_order": 3 + }, + { + "propname": ":isTitleInGerman", + "cardinality": "0-1", + "gui_order": 4 + }, + { + "propname": ":isEquivalentTo", + "cardinality": "0-1", + "gui_order": 5 + }, + { + "propname": ":belongsToOccupationField", + "cardinality": "0-1", + "gui_order": 6 + }, + { + "propname": ":isGodRelatedTo", + "cardinality": "0-n", + "gui_order": 7 + }, + { + "propname": ":isPlaceRelatedTo", + "cardinality": "0-n", + "gui_order": 8 + }, + { + "propname": ":hasBibliography", + "cardinality": "0-n", + "gui_order": 9 + }, + { + "propname": ":hasRemarks", + "cardinality": "0-n", + "gui_order": 10 + } + ] + }, + { + "name": "GenericAnthroponym", + "super": [ + "Resource" + ], + "labels": { + "en": "Generic anthroponym", + "de": "Allgemeines Anthroponym", + "fr": "Anthroponyme générique", + "it": "Antroponimo generico", + "rm": "Rumantsch" + }, + "comments": { + "de": "Only German" + }, + "cardinalities": [ + { + "propname": ":isGenericAnthroponym", + "cardinality": "1", + "gui_order": 1 + }, + { + "propname": ":hasTMNameID", + "cardinality": "0-1", + "gui_order": 2 + }, + { + "propname": ":hasEgyptianVersion", + "cardinality": "0-1", + "gui_order": 3 + }, + { + "propname": ":hasGreekVersion", + "cardinality": "0-1", + "gui_order": 4 + }, + { + "propname": ":hasLatinVersion", + "cardinality": "0-1", + "gui_order": 5 + }, + { + "propname": ":hasCopticVersion", + "cardinality": "0-1", + "gui_order": 6 + }, + { + "propname": ":hasOtherVersion", + "cardinality": "0-n", + "gui_order": 7 + }, + { + "propname": ":hasGender", + "cardinality": "0-1", + "gui_order": 8 + }, + { + "propname": ":hasLinguisticOrigin", + "cardinality": "0-1", + "gui_order": 9 + }, + { + "propname": ":hasEnglishTranslation", + "cardinality": "0-1", + "gui_order": 10 + }, + { + "propname": ":hasFrenchTranslation", + "cardinality": "0-1", + "gui_order": 11 + }, + { + "propname": ":hasGermanTranslation", + "cardinality": "0-1", + "gui_order": 12 + }, + { + "propname": ":isGodRelatedTo", + "cardinality": "0-1", + "gui_order": 13 + }, + { + "propname": ":isPlaceRelatedTo", + "cardinality": "0-1", + "gui_order": 14 + }, + { + "propname": ":hasAttestationAmount", + "cardinality": "0-1", + "gui_order": 15 + }, + { + "propname": ":isMostCommonIn", + "cardinality": "0-n", + "gui_order": 16 + }, + { + "propname": ":isMostWidespreadIn", + "cardinality": "0-n", + "gui_order": 17 + }, + { + "propname": ":hasTerminusPostQuem", + "cardinality": "0-1", + "gui_order": 18 + }, + { + "propname": ":hasTerminusAnteQuem", + "cardinality": "0-1", + "gui_order": 19 + }, + { + "propname": ":hasBibliography", + "cardinality": "0-n", + "gui_order": 20 + }, + { + "propname": ":hasRemarks", + "cardinality": "0-n", + "gui_order": 21 + } + ] + }, + { + "name": "FamilyMember", + "super": [ + "Resource" + ], + "labels": { + "en": "Family member", + "de": "Familienmitglied", + "fr": "Membre de la famille", + "it": "Membro della famiglia", + "rm": "Rumantsch" + }, + "comments": { + "fr": "Only French" + }, + "cardinalities": [ + { + "propname": ":hasAnthroponym", + "cardinality": "1", + "gui_order": 1 + }, + { + "propname": ":isRelatedTo", + "cardinality": "0-1", + "gui_order": 2 + }, + { + "propname": ":isRelatedAs", + "cardinality": "0-1", + "gui_order": 3 + }, + { + "propname": ":correspondsToGenericAnthroponym", + "cardinality": "0-1", + "gui_order": 4 + }, + { + "propname": ":hasAlias", + "cardinality": "0-1", + "gui_order": 5 + }, + { + "propname": ":hasGender", + "cardinality": "0-1", + "gui_order": 6 + }, + { + "propname": ":isDesignatedAs", + "cardinality": "0-1", + "gui_order": 7 + }, + { + "propname": ":hasTitle", + "cardinality": "1-n", + "gui_order": 8 + }, + { + "propname": ":hasStatus", + "cardinality": "0-1", + "gui_order": 9 + }, + { + "propname": ":hasFamilyRelationTo", + "cardinality": "1-n", + "gui_order": 10 + }, + { + "propname": ":hasLifeYearAmount", + "cardinality": "0-1", + "gui_order": 11 + }, + { + "propname": ":hasBirthDate", + "cardinality": "0-1", + "gui_order": 12 + }, + { + "propname": ":hasDeathDate", + "cardinality": "0-1", + "gui_order": 13 + }, + { + "propname": ":hasBibliography", + "cardinality": "1-n", + "gui_order": 14 + }, + { + "propname": ":hasRemarks", + "cardinality": "1-n", + "gui_order": 15 + } + ] + }, + { + "name": "MentionedPerson", + "super": [ + "Resource" + ], + "labels": { + "en": "Mentioned person", + "de": "Erwähnte Person", + "fr": "Personne mentionnée", + "it": "Persona menzionata", + "rm": "Rumantsch" + }, + "comments": { + "it": "Only Italian" + }, + "cardinalities": [ + { + "propname": ":hasAnthroponym", + "cardinality": "1", + "gui_order": 1 + }, + { + "propname": ":isOwnerOf", + "cardinality": "0-1", + "gui_order": 2 + }, + { + "propname": ":correspondsToGenericAnthroponym", + "cardinality": "0-1", + "gui_order": 3 + }, + { + "propname": ":hasAlias", + "cardinality": "0-1", + "gui_order": 4 + }, + { + "propname": ":hasGender", + "cardinality": "0-1", + "gui_order": 5 + }, + { + "propname": ":isDesignatedAs", + "cardinality": "0-1", + "gui_order": 6 + }, + { + "propname": ":hasTitle", + "cardinality": "1-n", + "gui_order": 7 + }, + { + "propname": ":hasStatus", + "cardinality": "0-1", + "gui_order": 8 + }, + { + "propname": ":hasFamilyRelationTo", + "cardinality": "1-n", + "gui_order": 9 + }, + { + "propname": ":hasLifeYearAmount", + "cardinality": "0-1", + "gui_order": 10 + }, + { + "propname": ":hasBirthDate", + "cardinality": "0-1", + "gui_order": 11 + }, + { + "propname": ":hasDeathDate", + "cardinality": "0-1", + "gui_order": 12 + }, + { + "propname": ":hasBibliography", + "cardinality": "1-n", + "gui_order": 13 + }, + { + "propname": ":hasRemarks", + "cardinality": "1-n", + "gui_order": 14 + } + ] + }, + { + "name": "Alias", + "super": [ + "Resource" + ], + "labels": { + "en": "Alias", + "de": "Alias", + "fr": "Alias", + "it": "Alias", + "rm": "Rumantsch" + }, + "comments": { + "rm": "Only Romansh" + }, + "cardinalities": [ + { + "propname": ":isAlias", + "cardinality": "1", + "gui_order": 1 + }, + { + "propname": ":isAliasOf", + "cardinality": "0-1", + "gui_order": 2 + }, + { + "propname": ":correspondsToGenericAnthroponym", + "cardinality": "0-1", + "gui_order": 3 + }, + { + "propname": ":hasGender", + "cardinality": "0-1", + "gui_order": 4 + } + ] + }, + { + "name": "Image", + "super": [ + "StillImageRepresentation", + "dcterms:image" + ], + "labels": { + "en": "Only English" + }, + "comments": { + "en": "Image", + "de": "Bild" + }, + "cardinalities": [ + { + "propname": ":hasRemarks", + "cardinality": "1", + "gui_order": 1 + } + ] + }, + { + "name": "Video", + "super": [ + "MovingImageRepresentation" + ], + "labels": { + "de": "Only German" + }, + "comments": { + "en": "Video", + "de": "Video" + }, + "cardinalities": [ + { + "propname": ":hasRemarks", + "cardinality": "1", + "gui_order": 1 + } + ] + }, + { + "name": "Audio", + "super": [ + "AudioRepresentation" + ], + "labels": { + "fr": "Only French" + }, + "comments": { + "en": "Audio", + "de": "Audio" + }, + "cardinalities": [ + { + "propname": ":hasRemarks", + "cardinality": "1", + "gui_order": 1 + } + ] + }, + { + "name": "ZIP", + "super": [ + "ArchiveRepresentation" + ], + "labels": { + "it": "Only Italian" + }, + "comments": { + "en": "ZIP", + "de": "ZIP" + }, + "cardinalities": [ + { + "propname": ":hasRemarks", + "cardinality": "1", + "gui_order": 1 + } + ] + }, + { + "name": "PDFDocument", + "super": [ + "DocumentRepresentation" + ], + "labels": { + "rm": "Only Rumantsch" + }, + "comments": { + "en": "PDF Document", + "de": "PDF-Dokument" + }, + "cardinalities": [ + { + "propname": ":hasRemarks", + "cardinality": "1", + "gui_order": 1 + } + ] + } + ] + } + ] + } +} \ No newline at end of file diff --git a/testdata/excel2json_files/lists/de.xlsx b/testdata/excel2json_files/lists/de.xlsx new file mode 100644 index 000000000..0ed08b1b3 Binary files /dev/null and b/testdata/excel2json_files/lists/de.xlsx differ diff --git a/testdata/excel2json_files/lists/en.xlsx b/testdata/excel2json_files/lists/en.xlsx new file mode 100644 index 000000000..2d275f436 Binary files /dev/null and b/testdata/excel2json_files/lists/en.xlsx differ diff --git a/testdata/excel2json_files/lists/fr.xlsx b/testdata/excel2json_files/lists/fr.xlsx new file mode 100644 index 000000000..0eecf4010 Binary files /dev/null and b/testdata/excel2json_files/lists/fr.xlsx differ diff --git a/testdata/excel2json_files/test-name (test_label)/properties.xlsx b/testdata/excel2json_files/test-name (test_label)/properties.xlsx new file mode 100644 index 000000000..3ca1d79f0 Binary files /dev/null and b/testdata/excel2json_files/test-name (test_label)/properties.xlsx differ diff --git a/testdata/excel2json_files/test-name (test_label)/resources.xlsx b/testdata/excel2json_files/test-name (test_label)/resources.xlsx new file mode 100644 index 000000000..8da7120b7 Binary files /dev/null and b/testdata/excel2json_files/test-name (test_label)/resources.xlsx differ