/
excel_to_json_resources.py
116 lines (96 loc) · 3.32 KB
/
excel_to_json_resources.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import json
import os
from typing import Any
import jsonschema
from openpyxl import load_workbook
from openpyxl.workbook.workbook import Workbook
def validate_resources_with_schema(json_file: str) -> bool:
"""
This function checks if the json resources are valid according to the schema.
Args:
json_file: the json with the resources to be validated
Returns:
True if the data passed validation, False otherwise
"""
current_dir = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(current_dir, '../schemas/resources-only.json')) as schema:
resources_schema = json.load(schema)
try:
jsonschema.validate(instance=json_file, schema=resources_schema)
except jsonschema.exceptions.ValidationError as err:
print(err)
return False
print('Resource data passed schema validation.')
return True
def resources_excel2json(excelfile: str, outfile: str) -> None:
"""
Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology
Args:
excelfile: path to the Excel file containing the properties
outfile: path to the output JSON file containing the properties section for the ontology
Returns:
None
"""
# load file
wb = load_workbook(excelfile, read_only=True)
# get overview
sheet = wb['classes']
resource_list = [c for c in sheet.iter_rows(min_row=2, values_only=True)]
prefix = '"resources":'
resources = [_extract_row(res, wb) for res in resource_list]
if validate_resources_with_schema(json.loads(json.dumps(resources, indent=4))):
# write final list to JSON file if list passed validation
with open(file=outfile, mode='w+', encoding='utf-8') as file:
file.write(prefix)
json.dump(resources, file, indent=4)
print('Resource file was created successfully and written to file:', outfile)
else:
print('Resource data is not valid according to schema.')
def _extract_row(row: tuple[str, str, str, str, str, str, str, str, str, str], wb: Workbook) -> dict[str, Any]:
"""build a property dict from a row of the excel file"""
# get name
name = row[0]
# get labels
labels = {}
if row[1]:
labels['en'] = row[1]
if row[2]:
labels['de'] = row[2]
if row[3]:
labels['fr'] = row[3]
if row[4]:
labels['it'] = row[4]
# get comments
comments = {}
if row[5]:
comments['en'] = row[5]
if row[6]:
comments['de'] = row[6]
if row[7]:
comments['fr'] = row[7]
if row[8]:
comments['it'] = row[8]
# get super
sup = row[9]
# load details for this resource
sh = wb[name]
property_list = [c for c in sh.iter_rows(min_row=2, values_only=True)]
cards = []
# for each of the detail sheets
for i, prop in enumerate(property_list):
# get name and cardinality.
# GUI-order is equal to order in the sheet.
property_ = {
"propname": ":" + prop[0],
"cardinality": str(prop[1]),
"gui_order": i + 1
}
cards.append(property_)
# return resource dict
return {
"name": name,
"labels": labels,
"comments": comments,
"super": sup,
"cardinalities": cards
}