diff --git a/MANIFEST.in b/MANIFEST.in index d97f0504b..03db2abdb 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,5 +3,6 @@ include knora/dsplib/utils/knora-schema.json include knora/dsplib/utils/knora-schema-lists.json include knora/dsplib/utils/knora-schema-lists-only.json include knora/dsplib/utils/knora-schema-resources-only.json +include knora/dsplib/utils/knora-schema-properties-only.json include knora/dsplib/utils/knora-data-schema.xsd include knora/dsplib/utils/language-codes-3b2_csv.csv diff --git a/docs/assets/images/img-properties-example.png b/docs/assets/images/img-properties-example.png new file mode 100644 index 000000000..43bcc20de Binary files /dev/null and b/docs/assets/images/img-properties-example.png differ diff --git a/docs/dsp-tools-excel.md b/docs/dsp-tools-excel.md index a5f106c83..434e073fa 100644 --- a/docs/dsp-tools-excel.md +++ b/docs/dsp-tools-excel.md @@ -25,7 +25,23 @@ For further information about resources, see [here](./dsp-tools-create.md#resour ## Create the properties for a data model from an Excel file -[not yet implemented] +With dsp-tools the `properties` section used in a data model (JSON) can be created from an Excel file. Only the first worksheet of +the Excel file is considered and only XLSX files are allowed. The `properties` section can be inserted into the ontology file and +then be uploaded onto a DSP server. + +The Excel sheet must have the following format: +![img-properties-example.png](assets/images/img-properties-example.png) + +The expected columns are: + +- `name` : The name of the property +- `super` : The base property of the property +- `object` : The resource the property refers to if it is a link property (property derived from `hasLinkTo`) +- `en`, `de`, `fr`, `it` : The labels of the property in different languages, at least one language has to be provided +- `gui_element` : The GUI element for the property +- `hlist` : In case of list values the according list + +For further information about properties, see [here](./dsp-tools-create.md#properties). ## Create a DSP-conform XML file from an Excel file diff --git a/docs/dsp-tools-usage.md b/docs/dsp-tools-usage.md index 23b9dae3b..d3a35a68f 100644 --- a/docs/dsp-tools-usage.md +++ b/docs/dsp-tools-usage.md @@ -125,10 +125,11 @@ the usage of this command can be found [here](./dsp-tools-excel.md#create-a-list dsp-tools excel2resources excel_file.xlsx output_file.json ``` -The command is used to create the resource section of an ontology from an Excel file. Therefore, an Excel file has to be provided +The command is used to create the resources section of an ontology from an Excel file. Therefore, an Excel file has to be provided with the data in the first worksheet of the Excel file. -The following example shows how to create the resources section from an Excel file called `Resources.xlsx`. +The following example shows how to create the resources section from an Excel file called `Resources.xlsx`. The output is written +to a file called `resources.json`. ```bash dsp-tools excel2resources Resources.xlsx resources.json @@ -136,4 +137,24 @@ dsp-tools excel2resources Resources.xlsx resources.json More information about the usage of this command can be found [here](./dsp-tools-excel.md#create-the-resources-for-a-data-model-from-an-excel-file) +. + +## Create properties from an Excel file + +```bash +dsp-tools excel2properties excel_file.xlsx output_file.json +``` + +The command is used to create the properties section of an ontology from an Excel file. Therefore, an Excel file has to be +provided with the data in the first worksheet of the Excel file. + +The following example shows how to create the properties section from an Excel file called `Properties.xlsx`. The output is +written to a file called `properties.json`. + +```bash +dsp-tools excel2properties Properties.xlsx properties.json +``` + +More information about the usage of this command can be found +[here](./dsp-tools-excel.md#create-the-properties-for-a-data-model-from-an-excel-file) . \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 682e6938b..3670cf087 100644 --- a/docs/index.md +++ b/docs/index.md @@ -24,5 +24,9 @@ dsp-tools helps you with the following tasks: creates a JSON or XML file from one or several Excel files. The created data can either be integrated into an ontology or be uploaded directly to a DSP server with `dsp-tools create`. - [`dsp-tools excel2resources`](./dsp-tools-usage.md#create-resources-from-an-excel-file) - creates the ontology's resource section from an Excel file. The resources can be integrated into an ontology and then be + creates the ontology's resource section from an Excel file. The resulting section can be integrated into an ontology and then be uploaded to a DSP server with `dsp-tools create`. +- [`dsp-tools excel2properties`](./dsp-tools-usage.md#create-properties-from-an-excel-file) + creates the ontology's properties section from an Excel file. The resulting section can be integrated into an ontology and then + be uploaded to a DSP server with `dsp-tools create`. + diff --git a/knora/dsp_tools.py b/knora/dsp_tools.py index df0617b99..ac7e08275 100644 --- a/knora/dsp_tools.py +++ b/knora/dsp_tools.py @@ -15,6 +15,7 @@ from dsplib.utils.onto_get import get_ontology from dsplib.utils.excel_to_json_lists import list_excel2json, validate_list_with_schema from dsplib.utils.excel_to_json_resources import resources_excel2json +from dsplib.utils.excel_to_json_properties import properties_excel2json from dsplib.utils.onto_validate import validate_ontology from dsplib.utils.xml_upload import xml_upload @@ -93,6 +94,14 @@ def program(args: list) -> None: parser_excel_resources.add_argument('outfile', help='Path to the output JSON file containing the resource data', default='resources.json') + parser_excel_properties = subparsers.add_parser('excel2properties', help='Create a JSON file from an Excel file containing ' + 'properties for a DSP ontology. ') + parser_excel_properties.set_defaults(action='excel2properties') + parser_excel_properties.add_argument('excelfile', help='Path to the Excel file containing the properties', + default='properties.xlsx') + parser_excel_properties.add_argument('outfile', help='Path to the output JSON file containing the properties data', + default='properties.json') + args = parser.parse_args(args) if not hasattr(args, 'action'): @@ -145,6 +154,9 @@ def program(args: list) -> None: elif args.action == 'excel2resources': resources_excel2json(excelfile=args.excelfile, outfile=args.outfile) + elif args.action == 'excel2properties': + properties_excel2json(excelfile=args.excelfile, + outfile=args.outfile) def main(): diff --git a/knora/dsplib/utils/BUILD.bazel b/knora/dsplib/utils/BUILD.bazel index 874eb98b9..e057a2abe 100644 --- a/knora/dsplib/utils/BUILD.bazel +++ b/knora/dsplib/utils/BUILD.bazel @@ -24,6 +24,16 @@ py_library( ] ) +py_library( + name = "excel_to_json_properties", + visibility = ["//visibility:public"], + srcs = ["excel_to_json_properties.py"], + deps = [ + requirement("jsonschema"), + requirement("openpyxl") + ] +) + py_library( name = "expand_all_lists", visibility = ["//visibility:public"], diff --git a/knora/dsplib/utils/excel_to_json_properties.py b/knora/dsplib/utils/excel_to_json_properties.py new file mode 100644 index 000000000..7fd2c2b25 --- /dev/null +++ b/knora/dsplib/utils/excel_to_json_properties.py @@ -0,0 +1,93 @@ +import json +import os + +import jsonschema +from openpyxl import load_workbook + + +def validate_properties_with_schema(json_file: str) -> bool: + """ + This function checks if the json properties are valid according to the schema. + + Args: + json_file: the json with the properties to be validated + + Returns: + True if the data passed validation, False otherwise + + """ + current_dir = os.path.dirname(os.path.realpath(__file__)) + with open(os.path.join(current_dir, 'knora-schema-properties-only.json')) as schema: + properties_schema = json.load(schema) + + try: + jsonschema.validate(instance=json_file, schema=properties_schema) + except jsonschema.exceptions.ValidationError as err: + print(err) + return False + print('Properties data passed schema validation.') + return True + + +def properties_excel2json(excelfile: str, outfile: str): + """ + Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology + + Args: + excelfile: path to the Excel file containing the properties + outfile: path to the output JSON file containing the properties section for the ontology + + Returns: + None + """ + # load file + wb = load_workbook(filename=excelfile, read_only=True) + sheet = wb.worksheets[0] + props = [row_to_prop(row) for row in sheet.iter_rows(min_row=2, values_only=True, max_col=9)] + + prefix = '"properties":' + + if validate_properties_with_schema(json.loads(json.dumps(props, indent=4))): + # write final list to JSON file if list passed validation + with open(file=outfile, mode='w+', encoding='utf-8') as file: + file.write(prefix) + json.dump(props, file, indent=4) + print('Properties file was created successfully and written to file:', outfile) + else: + print('Properties data is not valid according to schema.') + + return props + + +def row_to_prop(row): + """ + Parses the row of an Excel sheet and makes a property from it + + Args: + row: the row of an Excel sheet + + Returns: + prop (JSON): the property in JSON format + """ + name, super_, object_, en, de, fr, it, gui_element, hlist = row + labels = {} + if en: + labels['en'] = en + if de: + labels['de'] = de + if fr: + labels['fr'] = fr + if it: + labels['it'] = it + if not labels: + raise Exception(f"No label given in any of the four languages: {name}") + prop = { + 'name': name, + 'super': [super_], + 'object': object_, + 'labels': labels, + 'gui_element': gui_element + } + if hlist: + prop['gui_attributes'] = {'hlist': hlist} + return prop diff --git a/knora/dsplib/utils/knora-schema-properties-only.json b/knora/dsplib/utils/knora-schema-properties-only.json new file mode 100644 index 000000000..a41788cee --- /dev/null +++ b/knora/dsplib/utils/knora-schema-properties-only.json @@ -0,0 +1,179 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema-properties-only#", + "$id": "http://knora.org/pyknora/ontology/knora-schema-properties-only.json", + "title": "Knora JSON schema for properties only", + "description": "JSON schema for properties used in Knora ontologies", + "definitions": { + "langstring": { + "anyOf": [ + { + "type": "object", + "patternProperties": { + "^(en|de|fr|it)": { + "type": "string" + } + }, + "additionalProperties": false + }, + { + "type": "string" + } + ] + }, + "label": { + "$ref": "#/definitions/langstring" + }, + "comment": { + "$ref": "#/definitions/langstring" + }, + "property": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "super": { + "type": "array", + "items": { + "type": "string", + "oneOf": [ + { + "enum": [ + "hasValue", + "hasLinkTo", + "hasColor", + "hasComment", + "hasGeometry", + "isPartOf", + "isRegionOf", + "isAnnotationOf", + "hasRepresentation", + "seqnum" + ] + }, + { + "pattern": "^([\\w-]+)?:(\\w+)$" + }, + { + "pattern": "^(http)(s)?://.*" + } + ] + } + }, + "object": { + "type": "string", + "oneOf": [ + { + "enum": [ + "TextValue", + "ColorValue", + "DateValue", + "DecimalValue", + "GeomValue", + "GeonameValue", + "IntValue", + "BooleanValue", + "TimeValue", + "UriValue", + "IntervalValue", + "ListValue", + "Region", + "Resource", + "Annotation" + ] + }, + { + "pattern": "^([\\w-]+)?:(\\w+)$" + } + ] + }, + "subject": { + "type": "string", + "pattern": "^([\\w-]+)?:(\\w+)$" + }, + "labels": { + "$ref": "#/definitions/label" + }, + "gui_element": { + "type": "string", + "enum": [ + "Colorpicker", + "Date", + "Geometry", + "Geonames", + "Interval", + "TimeStamp", + "List", + "Pulldown", + "Radio", + "Richtext", + "Searchbox", + "SimpleText", + "Slider", + "Spinbox", + "Textarea", + "Checkbox", + "Fileupload" + ] + }, + "gui_attributes": { + "type": "object", + "properties": { + "size": { + "type": "integer" + }, + "maxsize": { + "type": "integer" + }, + "hlist": { + "type": "string" + }, + "numprops": { + "type": "integer" + }, + "ncolors": { + "type": "integer" + }, + "cols": { + "type": "integer" + }, + "rows": { + "type": "integer" + }, + "width": { + "type": "string", + "pattern": "^[0-9]*%?$" + }, + "wrap": { + "type": "string", + "enum": [ + "soft", + "hard" + ] + }, + "max": { + "type": "number" + }, + "min": { + "type": "number" + } + } + }, + "comments": { + "$ref": "#/definitions/comment" + } + }, + "required": [ + "name", + "object", + "labels", + "gui_element" + ], + "additionalProperties": false + } + }, + "type": "array", + "items": { + "$ref": "#/definitions/property" + } +} diff --git a/test/BUILD.bazel b/test/BUILD.bazel index 15d7c23d4..3d00a94b5 100644 --- a/test/BUILD.bazel +++ b/test/BUILD.bazel @@ -133,7 +133,8 @@ py_test( "//knora/dsplib/utils:onto_create_ontology", "//knora/dsplib/utils:xml_upload", "//knora/dsplib/utils:excel_to_json_lists", - "//knora/dsplib/utils:excel_to_json_resources" + "//knora/dsplib/utils:excel_to_json_resources", + "//knora/dsplib/utils:excel_to_json_properties" ], data = [ "//testdata:testdata", diff --git a/test/test_tools.py b/test/test_tools.py index 574ed126b..7482243ed 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -4,6 +4,7 @@ from knora.dsplib.utils import excel_to_json_lists from knora.dsplib.utils.excel_to_json_lists import list_excel2json +from knora.dsplib.utils.excel_to_json_properties import properties_excel2json from knora.dsplib.utils.excel_to_json_resources import resources_excel2json from knora.dsplib.utils.onto_create_ontology import create_ontology from knora.dsplib.utils.onto_get import get_ontology @@ -87,6 +88,10 @@ def test_excel_to_json_resources(self): resources_excel2json(excelfile='testdata/Resources.xlsx', outfile='_resources-out.json') + def test_excel_to_json_properties(self): + properties_excel2json(excelfile='testdata/Properties.xlsx', + outfile='_properties-out.json') + def test_validate_ontology(self): validate_ontology('testdata/test-onto.json') diff --git a/testdata/BUILD.bazel b/testdata/BUILD.bazel index 9c6edfd52..51adcc7d6 100644 --- a/testdata/BUILD.bazel +++ b/testdata/BUILD.bazel @@ -10,6 +10,7 @@ filegroup( srcs = [ "anything-onto.json", "Resources.xlsx", + "Properties.xlsx", "lists/description_en.xlsx", "lists/Beschreibung_de.xlsx", "test-data.xml", diff --git a/testdata/Properties.xlsx b/testdata/Properties.xlsx new file mode 100644 index 000000000..0212d9d19 Binary files /dev/null and b/testdata/Properties.xlsx differ