diff --git a/.github/workflows/ckeck-pr-title.yml b/.github/workflows/check-pr-title.yml similarity index 84% rename from .github/workflows/ckeck-pr-title.yml rename to .github/workflows/check-pr-title.yml index e52642737..04673048a 100644 --- a/.github/workflows/ckeck-pr-title.yml +++ b/.github/workflows/check-pr-title.yml @@ -12,7 +12,7 @@ jobs: # check PR title - uses: deepakputhraya/action-pr-title@master with: - regex: '([a-z])+(\(([a-z\-_ ])+\))?!?: [a-z]([a-zA-Z-\.\d \(\)\[\]#_,])+$' # Regex the title should match. + regex: '([a-z])+(\(([0-9a-z\-_, ])+\))?!?: [a-z]([a-zA-Z-\.\d \(\)\[\]#_,])+$' # Regex the title should match. allowed_prefixes: "fix,refactor,feat,docs,chore,style,test" # title should start with the given prefix disallowed_prefixes: "feature,hotfix" # title should not start with the given prefix prefix_case_sensitive: true # title prefix are case insensitive diff --git a/MANIFEST.in b/MANIFEST.in index ceded8fc5..1bd2d0cf5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,4 +4,3 @@ include knora/dsplib/schemas/lists-only.json include knora/dsplib/schemas/resources-only.json include knora/dsplib/schemas/properties-only.json include knora/dsplib/schemas/data.xsd -include knora/dsplib/utils/language-codes-3b2_csv.csv diff --git a/Pipfile b/Pipfile index 6d0394e2e..8a79896ca 100644 --- a/Pipfile +++ b/Pipfile @@ -17,6 +17,7 @@ pystrict = "*" openpyxl = "*" pyparsing = "==2.4.7" networkx = "*" +pandas = "*" [dev-packages] mkdocs = "*" @@ -28,7 +29,6 @@ setuptools = "*" wheel = "*" pipenv-setup = "*" pytest = "*" -pandas = "*" [requires] python_version = "3.9" diff --git a/Pipfile.lock b/Pipfile.lock index 74c81620b..a19f44074 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "cc82ff2fd365554f59380994a1a47b2c0468899dc8dcdd99e7a9cc6c9dd82338" + "sha256": "2e734068322783409c232ff7c26d4771f37342afece91bf3edaa4e4ddaba409a" }, "pipfile-spec": 6, "requires": { @@ -181,6 +181,34 @@ "index": "pypi", "version": "==2.8.4" }, + "numpy": { + "hashes": [ + "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207", + "sha256:1ce7ab2053e36c0a71e7a13a7475bd3b1f54750b4b433adc96313e127b870887", + "sha256:2d487e06ecbf1dc2f18e7efce82ded4f705f4bd0cd02677ffccfb39e5c284c7e", + "sha256:37431a77ceb9307c28382c9773da9f306435135fae6b80b62a11c53cfedd8802", + "sha256:3e1ffa4748168e1cc8d3cde93f006fe92b5421396221a02f2274aab6ac83b077", + "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af", + "sha256:43a8ca7391b626b4c4fe20aefe79fec683279e31e7c79716863b4b25021e0e74", + "sha256:4c6036521f11a731ce0648f10c18ae66d7143865f19f7299943c985cdc95afb5", + "sha256:59d55e634968b8f77d3fd674a3cf0b96e85147cd6556ec64ade018f27e9479e1", + "sha256:64f56fc53a2d18b1924abd15745e30d82a5782b2cab3429aceecc6875bd5add0", + "sha256:7228ad13744f63575b3a972d7ee4fd61815b2879998e70930d4ccf9ec721dce0", + "sha256:9ce7df0abeabe7fbd8ccbf343dc0db72f68549856b863ae3dd580255d009648e", + "sha256:a911e317e8c826ea632205e63ed8507e0dc877dcdc49744584dfc363df9ca08c", + "sha256:b89bf9b94b3d624e7bb480344e91f68c1c6c75f026ed6755955117de00917a7c", + "sha256:ba9ead61dfb5d971d77b6c131a9dbee62294a932bf6a356e48c75ae684e635b3", + "sha256:c1d937820db6e43bec43e8d016b9b3165dcb42892ea9f106c70fb13d430ffe72", + "sha256:cc7f00008eb7d3f2489fca6f334ec19ca63e31371be28fd5dad955b16ec285bd", + "sha256:d4c5d5eb2ec8da0b4f50c9a843393971f31f1d60be87e0fb0917a49133d257d6", + "sha256:e96d7f3096a36c8754207ab89d4b3282ba7b49ea140e4973591852c77d09eb76", + "sha256:f0725df166cf4785c0bc4cbfb320203182b1ecd30fee6e541c8752a92df6aa32", + "sha256:f3eb268dbd5cfaffd9448113539e44e2dd1c5ca9ce25576f7c04a5453edc26fa", + "sha256:fb7a980c81dd932381f8228a426df8aeb70d59bbcda2af075b627bbc50207cba" + ], + "markers": "python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'", + "version": "==1.22.4" + }, "openpyxl": { "hashes": [ "sha256:0ab6d25d01799f97a9464630abacbb34aafecdcaa0ef3cba6d6b3499867d0355", @@ -189,6 +217,33 @@ "index": "pypi", "version": "==3.0.10" }, + "pandas": { + "hashes": [ + "sha256:0010771bd9223f7afe5f051eb47c4a49534345dfa144f2f5470b27189a4dd3b5", + "sha256:061609334a8182ab500a90fe66d46f6f387de62d3a9cb9aa7e62e3146c712167", + "sha256:09d8be7dd9e1c4c98224c4dfe8abd60d145d934e9fc1f5f411266308ae683e6a", + "sha256:295872bf1a09758aba199992c3ecde455f01caf32266d50abc1a073e828a7b9d", + "sha256:3228198333dd13c90b6434ddf61aa6d57deaca98cf7b654f4ad68a2db84f8cfe", + "sha256:385c52e85aaa8ea6a4c600a9b2821181a51f8be0aee3af6f2dcb41dafc4fc1d0", + "sha256:51649ef604a945f781105a6d2ecf88db7da0f4868ac5d45c51cb66081c4d9c73", + "sha256:5586cc95692564b441f4747c47c8a9746792e87b40a4680a2feb7794defb1ce3", + "sha256:5a206afa84ed20e07603f50d22b5f0db3fb556486d8c2462d8bc364831a4b417", + "sha256:5b79af3a69e5175c6fa7b4e046b21a646c8b74e92c6581a9d825687d92071b51", + "sha256:5c54ea4ef3823108cd4ec7fb27ccba4c3a775e0f83e39c5e17f5094cb17748bc", + "sha256:8c5bf555b6b0075294b73965adaafb39cf71c312e38c5935c93d78f41c19828a", + "sha256:92bc1fc585f1463ca827b45535957815b7deb218c549b7c18402c322c7549a12", + "sha256:95c1e422ced0199cf4a34385ff124b69412c4bc912011ce895582bee620dfcaa", + "sha256:b8134651258bce418cb79c71adeff0a44090c98d955f6953168ba16cc285d9f7", + "sha256:be67c782c4f1b1f24c2f16a157e12c2693fd510f8df18e3287c77f33d124ed07", + "sha256:c072c7f06b9242c855ed8021ff970c0e8f8b10b35e2640c657d2a541c5950f59", + "sha256:d0d4f13e4be7ce89d7057a786023c461dd9370040bdb5efa0a7fe76b556867a0", + "sha256:df82739e00bb6daf4bba4479a40f38c718b598a84654cbd8bb498fd6b0aa8c16", + "sha256:f549097993744ff8c41b5e8f2f0d3cbfaabe89b4ae32c8c08ead6cc535b80139", + "sha256:ff08a14ef21d94cdf18eef7c569d66f2e24e0bc89350bcd7d243dd804e3b5eb2" + ], + "index": "pypi", + "version": "==1.4.2" + }, "ply": { "hashes": [ "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3", @@ -239,6 +294,21 @@ "index": "pypi", "version": "==1.2" }, + "python-dateutil": { + "hashes": [ + "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.8.2" + }, + "pytz": { + "hashes": [ + "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", + "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" + ], + "version": "==2022.1" + }, "rdflib": { "hashes": [ "sha256:8dbfa0af2990b98471dacbc936d6494c997ede92fd8ed693fb84ee700ef6f754", @@ -265,11 +335,11 @@ }, "setuptools": { "hashes": [ - "sha256:5a844ad6e190dccc67d6d7411d119c5152ce01f7c76be4d8a1eaa314501bba77", - "sha256:bf8a748ac98b09d32c9a64a995a6b25921c96cc5743c1efa82763ba80ff54e91" + "sha256:990a4f7861b31532871ab72331e755b5f14efbe52d336ea7f6118144dd478741", + "sha256:c1848f654aea2e3526d17fc3ce6aeaa5e7e24e66e645b5be2171f3f6b4e5a178" ], "markers": "python_version >= '3.7'", - "version": "==62.4.0" + "version": "==62.6.0" }, "six": { "hashes": [ @@ -540,34 +610,6 @@ ], "version": "==0.4.3" }, - "numpy": { - "hashes": [ - "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207", - "sha256:1ce7ab2053e36c0a71e7a13a7475bd3b1f54750b4b433adc96313e127b870887", - "sha256:2d487e06ecbf1dc2f18e7efce82ded4f705f4bd0cd02677ffccfb39e5c284c7e", - "sha256:37431a77ceb9307c28382c9773da9f306435135fae6b80b62a11c53cfedd8802", - "sha256:3e1ffa4748168e1cc8d3cde93f006fe92b5421396221a02f2274aab6ac83b077", - "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af", - "sha256:43a8ca7391b626b4c4fe20aefe79fec683279e31e7c79716863b4b25021e0e74", - "sha256:4c6036521f11a731ce0648f10c18ae66d7143865f19f7299943c985cdc95afb5", - "sha256:59d55e634968b8f77d3fd674a3cf0b96e85147cd6556ec64ade018f27e9479e1", - "sha256:64f56fc53a2d18b1924abd15745e30d82a5782b2cab3429aceecc6875bd5add0", - "sha256:7228ad13744f63575b3a972d7ee4fd61815b2879998e70930d4ccf9ec721dce0", - "sha256:9ce7df0abeabe7fbd8ccbf343dc0db72f68549856b863ae3dd580255d009648e", - "sha256:a911e317e8c826ea632205e63ed8507e0dc877dcdc49744584dfc363df9ca08c", - "sha256:b89bf9b94b3d624e7bb480344e91f68c1c6c75f026ed6755955117de00917a7c", - "sha256:ba9ead61dfb5d971d77b6c131a9dbee62294a932bf6a356e48c75ae684e635b3", - "sha256:c1d937820db6e43bec43e8d016b9b3165dcb42892ea9f106c70fb13d430ffe72", - "sha256:cc7f00008eb7d3f2489fca6f334ec19ca63e31371be28fd5dad955b16ec285bd", - "sha256:d4c5d5eb2ec8da0b4f50c9a843393971f31f1d60be87e0fb0917a49133d257d6", - "sha256:e96d7f3096a36c8754207ab89d4b3282ba7b49ea140e4973591852c77d09eb76", - "sha256:f0725df166cf4785c0bc4cbfb320203182b1ecd30fee6e541c8752a92df6aa32", - "sha256:f3eb268dbd5cfaffd9448113539e44e2dd1c5ca9ce25576f7c04a5453edc26fa", - "sha256:fb7a980c81dd932381f8228a426df8aeb70d59bbcda2af075b627bbc50207cba" - ], - "markers": "python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'", - "version": "==1.22.4" - }, "orderedmultidict": { "hashes": [ "sha256:04070bbb5e87291cc9bfa51df413677faf2141c73c61d2a5f7b26bea3cd882ad", @@ -583,33 +625,6 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==20.9" }, - "pandas": { - "hashes": [ - "sha256:0010771bd9223f7afe5f051eb47c4a49534345dfa144f2f5470b27189a4dd3b5", - "sha256:061609334a8182ab500a90fe66d46f6f387de62d3a9cb9aa7e62e3146c712167", - "sha256:09d8be7dd9e1c4c98224c4dfe8abd60d145d934e9fc1f5f411266308ae683e6a", - "sha256:295872bf1a09758aba199992c3ecde455f01caf32266d50abc1a073e828a7b9d", - "sha256:3228198333dd13c90b6434ddf61aa6d57deaca98cf7b654f4ad68a2db84f8cfe", - "sha256:385c52e85aaa8ea6a4c600a9b2821181a51f8be0aee3af6f2dcb41dafc4fc1d0", - "sha256:51649ef604a945f781105a6d2ecf88db7da0f4868ac5d45c51cb66081c4d9c73", - "sha256:5586cc95692564b441f4747c47c8a9746792e87b40a4680a2feb7794defb1ce3", - "sha256:5a206afa84ed20e07603f50d22b5f0db3fb556486d8c2462d8bc364831a4b417", - "sha256:5b79af3a69e5175c6fa7b4e046b21a646c8b74e92c6581a9d825687d92071b51", - "sha256:5c54ea4ef3823108cd4ec7fb27ccba4c3a775e0f83e39c5e17f5094cb17748bc", - "sha256:8c5bf555b6b0075294b73965adaafb39cf71c312e38c5935c93d78f41c19828a", - "sha256:92bc1fc585f1463ca827b45535957815b7deb218c549b7c18402c322c7549a12", - "sha256:95c1e422ced0199cf4a34385ff124b69412c4bc912011ce895582bee620dfcaa", - "sha256:b8134651258bce418cb79c71adeff0a44090c98d955f6953168ba16cc285d9f7", - "sha256:be67c782c4f1b1f24c2f16a157e12c2693fd510f8df18e3287c77f33d124ed07", - "sha256:c072c7f06b9242c855ed8021ff970c0e8f8b10b35e2640c657d2a541c5950f59", - "sha256:d0d4f13e4be7ce89d7057a786023c461dd9370040bdb5efa0a7fe76b556867a0", - "sha256:df82739e00bb6daf4bba4479a40f38c718b598a84654cbd8bb498fd6b0aa8c16", - "sha256:f549097993744ff8c41b5e8f2f0d3cbfaabe89b4ae32c8c08ead6cc535b80139", - "sha256:ff08a14ef21d94cdf18eef7c569d66f2e24e0bc89350bcd7d243dd804e3b5eb2" - ], - "index": "pypi", - "version": "==1.4.2" - }, "pep517": { "hashes": [ "sha256:931378d93d11b298cf511dd634cf5ea4cb249a28ef84160b3247ee9afb4e8ab0", @@ -730,13 +745,6 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, - "pytz": { - "hashes": [ - "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", - "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" - ], - "version": "==2022.1" - }, "pyyaml": { "hashes": [ "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", @@ -802,11 +810,11 @@ }, "setuptools": { "hashes": [ - "sha256:5a844ad6e190dccc67d6d7411d119c5152ce01f7c76be4d8a1eaa314501bba77", - "sha256:bf8a748ac98b09d32c9a64a995a6b25921c96cc5743c1efa82763ba80ff54e91" + "sha256:990a4f7861b31532871ab72331e755b5f14efbe52d336ea7f6118144dd478741", + "sha256:c1848f654aea2e3526d17fc3ce6aeaa5e7e24e66e645b5be2171f3f6b4e5a178" ], "markers": "python_version >= '3.7'", - "version": "==62.4.0" + "version": "==62.6.0" }, "six": { "hashes": [ @@ -829,7 +837,7 @@ "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" ], - "markers": "python_version >= '3.7'", + "markers": "python_version < '3.11'", "version": "==2.0.1" }, "tomlkit": { @@ -837,7 +845,7 @@ "sha256:0f4050db66fd445b885778900ce4dd9aea8c90c4721141fde0d6ade893820ef1", "sha256:71ceb10c0eefd8b8f11fe34e8a51ad07812cb1dc3de23247425fbc9ddc47b9dd" ], - "markers": "python_version >= '3.6' and python_version < '4.0'", + "markers": "python_version >= '3.6' and python_version < '4'", "version": "==0.11.0" }, "typing-extensions": { diff --git a/dev-requirements.txt b/dev-requirements.txt index 60cc1fd8b..2cd532872 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -23,10 +23,8 @@ mkdocs-material~=8.3.6 mkdocs-material-extensions~=1.0.3 mypy~=0.961 mypy-extensions~=0.4.3 -numpy~=1.22.4 orderedmultidict~=1.0.1 packaging~=20.9 -pandas~=1.4.2 pep517~=0.12.0 pip~=22.1.2 pip-shims~=0.7.0 @@ -42,12 +40,11 @@ pymdown-extensions~=9.5 pyparsing~=2.4.7 pytest~=7.1.2 python-dateutil~=2.8.2 -pytz~=2022.1 pyyaml~=6.0 pyyaml-env-tag~=0.1 requests~=2.28.0 requirementslib~=1.6.4 -setuptools~=62.4.0 +setuptools~=62.6.0 six~=1.16.0 toml~=0.10.2 tomli~=2.0.1 diff --git a/docs/assets/images/img-properties-example.png b/docs/assets/images/img-properties-example.png index 5d73cbf9e..d7a7378c9 100644 Binary files a/docs/assets/images/img-properties-example.png and b/docs/assets/images/img-properties-example.png differ diff --git a/docs/assets/images/img-resources-example-1.png b/docs/assets/images/img-resources-example-1.png index 0ef19c91c..ab00f49a6 100644 Binary files a/docs/assets/images/img-resources-example-1.png and b/docs/assets/images/img-resources-example-1.png differ diff --git a/docs/assets/templates/properties_template.xlsx b/docs/assets/templates/properties_template.xlsx index 609b377e8..da85cc6bd 100644 Binary files a/docs/assets/templates/properties_template.xlsx and b/docs/assets/templates/properties_template.xlsx differ diff --git a/docs/assets/templates/resources_template.xlsx b/docs/assets/templates/resources_template.xlsx index fda7a1805..633cdc800 100644 Binary files a/docs/assets/templates/resources_template.xlsx and b/docs/assets/templates/resources_template.xlsx differ diff --git a/docs/dsp-tools-excel.md b/docs/dsp-tools-excel.md index 20a3d2ce0..4e08492b9 100644 --- a/docs/dsp-tools-excel.md +++ b/docs/dsp-tools-excel.md @@ -20,25 +20,25 @@ The expected worksheets of the Excel file are: - `classes`: a table with all resource classes intended to be used in the resulting JSON - `class1`, `class2`,...: a table for each resource class named after its name -The Excel sheet must have the following structure. - -The worksheet called `classes` has the following structure: +The worksheet called `classes` must have the following structure: ![img-resources-example-1.png](assets/images/img-resources-example-1.png) The expected columns are: -- `name` : The name of the resource -- `en`, `de`, `fr`, `it` : The labels of the resource in different languages, at least one language has to be provided -- `comment_en`, `comment_de`, `comment_fr`, `comment_it`: optional comments in the respective language -- `super` : The base class of the resource +- `name` (mandatory): The name of the resource +- `en`, `de`, `fr`, `it`, `rm`: The labels of the resource in different languages, at least one language has to be provided +- `comment_en`, `comment_de`, `comment_fr`, `comment_it`, `comment_rm` (optional): comments in the respective language +- `super` (mandatory): The base class(es) of the resource, separated by commas + +The optional columns may be omitted in the Excel. All other worksheets, one for each resource class, have the following structure: ![img-resources-example-2.png](assets/images/img-resources-example-2.png){ width=50% } The expected columns are: -- `Property` : The name of the property -- `Cardinality` : The cardinality, one of: `1`, `0-1`, `1-n`, `0-n` +- `Property` (mandatory): The name of the property +- `Cardinality` (mandatory): The cardinality, one of: `1`, `0-1`, `1-n`, `0-n` The GUI order is given by the order in which the properties are listed in the Excel sheet. @@ -58,15 +58,18 @@ The Excel sheet must have the following structure: The expected columns are: -- `name` : The name of the property -- `super` : The base property of the property -- `object` : If the property is derived from `hasValue`, the type of the property must be further specified by the +- `name` (mandatory): The name of the property +- `super` (mandatory): The base property/ies of the property, separated by commas +- `object` (mandatory): If the property is derived from `hasValue`, the type of the property must be further specified by the object it takes, e.g. `TextValue`, `ListValue`, or `IntValue`. If the property is derived from `hasLinkTo`, the `object` specifies the resource class that this property refers to. -- `en`, `de`, `fr`, `it` : The labels of the property in different languages, at least one language has to be provided -- `comment_en`, `comment_de`, `comment_fr`, `comment_it`: optional comments in the respective language -- `gui_element` : The GUI element for the property -- `hlist` : In case of list values: the name of the list +- `en`, `de`, `fr`, `it`, `rm`: The labels of the property in different languages, at least one language has to be provided +- `comment_en`, `comment_de`, `comment_fr`, `comment_it`, `comment_rm` (optional): comments in the respective language +- `gui_element` (mandatory): The GUI element for the property +- `gui_attributes` (optional): The gui_attributes in the form "attr: value, attr: value". + +The optional columns may be omitted in the Excel. +For backwards compatibility, files containing a column `hlist` are valid, but deprecated. For further information about properties, see [here](./dsp-tools-create-ontologies.md#properties). diff --git a/knora/dsplib/schemas/lists-only.json b/knora/dsplib/schemas/lists-only.json index a865a754f..c969a2ca0 100644 --- a/knora/dsplib/schemas/lists-only.json +++ b/knora/dsplib/schemas/lists-only.json @@ -1,5 +1,5 @@ { - "$schema": "https://json-schema.org/draft-07/schema", + "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://dasch.swiss/schema/lists.json", "title": "JSON schema for DSP lists", "description": "JSON schema for the lists section used in DSP ontologies", diff --git a/knora/dsplib/schemas/ontology.json b/knora/dsplib/schemas/ontology.json index 5a5829c1c..ac537eb90 100644 --- a/knora/dsplib/schemas/ontology.json +++ b/knora/dsplib/schemas/ontology.json @@ -1,5 +1,5 @@ { - "$schema": "https://json-schema.org/draft-07/schema", + "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://dasch.swiss/schema/ontology.json", "title": "JSON schema for DSP ontologies", "description": "JSON schema for DSP ontologies", diff --git a/knora/dsplib/schemas/properties-only.json b/knora/dsplib/schemas/properties-only.json index 0622af5aa..ce3dd39df 100644 --- a/knora/dsplib/schemas/properties-only.json +++ b/knora/dsplib/schemas/properties-only.json @@ -1,5 +1,5 @@ { - "$schema": "https://json-schema.org/draft-07/schema", + "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://dasch.swiss/schema/properties-only.json", "title": "JSON schema for properties used in DSP ontologies", "description": "JSON schema for the properties section used in DSP ontologies", @@ -82,6 +82,7 @@ "ListValue", "Region", "Resource", + "Representation", "Annotation" ] }, diff --git a/knora/dsplib/schemas/resources-only.json b/knora/dsplib/schemas/resources-only.json index b80c0c74e..b9ee60b51 100644 --- a/knora/dsplib/schemas/resources-only.json +++ b/knora/dsplib/schemas/resources-only.json @@ -1,5 +1,5 @@ { - "$schema": "https://json-schema.org/draft-07/schema", + "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://dasch.swiss/schema/resources-only.json", "title": "JSON schema for resources used in DSP ontologies", "description": "JSON schema for the resources section used in DSP ontologies", diff --git a/knora/dsplib/utils/excel_to_json_properties.py b/knora/dsplib/utils/excel_to_json_properties.py index 2172cc2ed..822ba9584 100644 --- a/knora/dsplib/utils/excel_to_json_properties.py +++ b/knora/dsplib/utils/excel_to_json_properties.py @@ -1,12 +1,17 @@ import json import os +import re from typing import Any import jsonschema -from openpyxl import load_workbook +import pandas as pd +from knora.dsplib.utils.excel_to_json_resources import prepare_dataframe -def validate_properties_with_schema(json_file: str) -> bool: +languages = ["en", "de", "fr", "it", "rm"] + + +def _validate_properties_with_schema(json_file: str) -> bool: """ This function checks if the json properties are valid according to the schema. @@ -18,7 +23,7 @@ def validate_properties_with_schema(json_file: str) -> bool: """ current_dir = os.path.dirname(os.path.realpath(__file__)) - with open(os.path.join(current_dir, '../schemas/properties-only.json')) as schema: + with open(os.path.join(current_dir, "../schemas/properties-only.json")) as schema: properties_schema = json.load(schema) try: @@ -26,79 +31,88 @@ def validate_properties_with_schema(json_file: str) -> bool: except jsonschema.exceptions.ValidationError as err: print(err) return False - print('Properties data passed schema validation.') + print("Properties data passed schema validation.") return True -def properties_excel2json(excelfile: str, outfile: str) -> list[dict[str, Any]]: +def _row2prop(row: pd.Series, row_count: int, excelfile: str) -> dict[str, Any]: """ - Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology + Takes a row from a pandas DataFrame, reads its content, and returns a dict object of the property Args: - excelfile: path to the Excel file containing the properties - outfile: path to the output JSON file containing the properties section for the ontology + row: row from a pandas DataFrame that defines a property + row_count: row number of Excel file + excelfile: name of the original excel file Returns: - List(JSON): a list with a dict (JSON) for each row in the Excel file + dict object of the property """ - # load file - wb = load_workbook(filename=excelfile, read_only=True) - sheet = wb.worksheets[0] - props = [row_to_prop(row) for row in sheet.iter_rows(min_row=2, values_only=True, max_col=13)] - - prefix = '"properties":' - - if validate_properties_with_schema(json.loads(json.dumps(props, indent=4))): - # write final list to JSON file if list passed validation - with open(file=outfile, mode='w+', encoding='utf-8') as file: - file.write(prefix) - json.dump(props, file, indent=4) - print('Properties file was created successfully and written to file:', outfile) - else: - print('Properties data is not valid according to schema.') - - return props - -def row_to_prop(row: tuple[str, str, str, str, str, str, str, str, str, str, str, str, str]) -> dict[str, Any]: + name = row["name"] + supers = [s.strip() for s in row["super"].split(",")] + _object = row["object"] + labels = {lang: row[lang] for lang in languages if row.get(lang)} + comments = {lang: row[f"comment_{lang}"] for lang in languages if row.get(f"comment_{lang}")} + gui_element = row["gui_element"] + + gui_attributes = dict() + if row.get("hlist"): + gui_attributes["hlist"] = row["hlist"] + if row.get("gui_attributes"): + pairs = row["gui_attributes"].split(",") + for pair in pairs: + if pair.count(":") != 1: + raise ValueError(f"Row {row_count} of Excel file {excelfile} contains invalid data in column " + f"'gui_attributes'. The expected format is 'attribute: value[, attribute: value]'.") + attr, val = [x.strip() for x in pair.split(":")] + if re.search(r"^\d+\.\d+$", val): + val = float(val) + elif re.search(r"^\d+$", val): + val = int(val) + gui_attributes[attr] = val + + # build the dict structure of this property and append it to the list of properties + _property = { + "name": name, + "super": supers, + "object": _object, + "labels": labels} + if comments: + _property["comments"] = comments + _property["gui_element"] = gui_element + if gui_attributes: + _property["gui_attributes"] = gui_attributes + + return _property + + +def properties_excel2json(excelfile: str, outfile: str) -> None: """ - Parses the row of an Excel sheet and makes a property from it + Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology Args: - row: the row of an Excel sheet + excelfile: path to the Excel file containing the properties + outfile: path to the output JSON file containing the properties section for the ontology Returns: - prop (JSON): the property in JSON format + None """ - name, super_, object_, en, de, fr, it, comment_en, comment_de, comment_fr, comment_it, gui_element, hlist = row - labels = {} - if en: - labels['en'] = en - if de: - labels['de'] = de - if fr: - labels['fr'] = fr - if it: - labels['it'] = it - if not labels: - raise Exception(f"No label given in any of the four languages: {name}") - comments = {} - if comment_en: - comments['en'] = comment_en - if comment_de: - comments['de'] = comment_de - if comment_fr: - comments['fr'] = comment_fr - if comment_it: - comments['it'] = comment_it - prop = { - 'name': name, - 'super': [super_], - 'object': object_, - 'labels': labels, - 'comments': comments, - 'gui_element': gui_element - } - if hlist: - prop['gui_attributes'] = {'hlist': hlist} - return prop + + # load file + df: pd.DataFrame = pd.read_excel(excelfile) + df = prepare_dataframe( + df=df, + required_columns=["name", "super", "object", "gui_element"], + location_of_sheet=f"File '{excelfile}'") + + # transform every row into a property + props = [_row2prop(row, i, excelfile) for i, row in df.iterrows()] + + # write final list to JSON file if list passed validation + if _validate_properties_with_schema(json.loads(json.dumps(props, indent=4))): + with open(file=outfile, mode="w+", encoding="utf-8") as file: + file.write('"properties": ') + json.dump(props, file, indent=4) + print("Properties file was created successfully and written to file: ", outfile) + else: + print("Properties data is not valid according to schema.") diff --git a/knora/dsplib/utils/excel_to_json_resources.py b/knora/dsplib/utils/excel_to_json_resources.py index d83e81342..c5d5641e5 100644 --- a/knora/dsplib/utils/excel_to_json_resources.py +++ b/knora/dsplib/utils/excel_to_json_resources.py @@ -1,13 +1,15 @@ import json import os +import re from typing import Any import jsonschema -from openpyxl import load_workbook -from openpyxl.workbook.workbook import Workbook +import pandas as pd +languages = ["en", "de", "fr", "it", "rm"] -def validate_resources_with_schema(json_file: str) -> bool: + +def _validate_resources_with_schema(json_file: str) -> bool: """ This function checks if the json resources are valid according to the schema. @@ -16,10 +18,9 @@ def validate_resources_with_schema(json_file: str) -> bool: Returns: True if the data passed validation, False otherwise - """ current_dir = os.path.dirname(os.path.realpath(__file__)) - with open(os.path.join(current_dir, '../schemas/resources-only.json')) as schema: + with open(os.path.join(current_dir, "../schemas/resources-only.json")) as schema: resources_schema = json.load(schema) try: @@ -27,90 +28,117 @@ def validate_resources_with_schema(json_file: str) -> bool: except jsonschema.exceptions.ValidationError as err: print(err) return False - print('Resource data passed schema validation.') + print("Resource data passed schema validation.") return True -def resources_excel2json(excelfile: str, outfile: str) -> None: +def prepare_dataframe(df: pd.DataFrame, required_columns: list[str], location_of_sheet: str) -> pd.DataFrame: """ - Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology + Takes a pandas DataFrame, strips the column headers from whitespaces and transforms them to lowercase, + strips every cell from whitespaces and inserts "" if there is no string in it, and deletes the rows that don't have + a value in one of the required cells. Args: - excelfile: path to the Excel file containing the properties - outfile: path to the output JSON file containing the properties section for the ontology + df: pandas DataFrame + required_columns: headers of the columns where a value is required + location_of_sheet: for better error messages, provide this information of the caller Returns: - None + prepared DataFrame """ - # load file - wb = load_workbook(excelfile, read_only=True) + any_char_regex = r"[\wäàçëéèêïöôòüÄÀÇËÉÊÏÖÔÒÜ]" + + # strip column headers and transform to lowercase, so that the script doesn't break when the headers vary a bit + new_df = df.rename(columns=lambda x: x.strip().lower()) + required_columns = [x.strip().lower() for x in required_columns] + # strip every cell, and insert "" if there is no valid word in it + new_df = new_df.applymap(lambda x: str(x).strip() if pd.notna(x) and re.search(any_char_regex, str(x), flags=re.IGNORECASE) else "") + # delete rows that don't have the required columns + for req in required_columns: + if req not in new_df: + raise ValueError(f"{location_of_sheet} requires a column named '{req}'") + new_df = new_df[pd.notna(new_df[req])] + new_df = new_df[[bool(re.search(any_char_regex, x, flags=re.IGNORECASE)) for x in new_df[req]]] + if len(new_df) < 1: + raise ValueError(f"{location_of_sheet} requires at least one row") + return new_df + + +def _row2resource(row: pd.Series, excelfile: str) -> dict[str, Any]: + """ + Method that takes a row from a pandas DataFrame, reads its content, and returns a dict object of the resource - # get overview - sheet = wb['classes'] - resource_list = [c for c in sheet.iter_rows(min_row=2, values_only=True)] + Args: + row: row from a pandas DataFrame that defines a resource + excelfile: Excel file where the data comes from - prefix = '"resources":' - resources = [_extract_row(res, wb) for res in resource_list] + Returns: + dict object of the resource + """ - if validate_resources_with_schema(json.loads(json.dumps(resources, indent=4))): - # write final list to JSON file if list passed validation - with open(file=outfile, mode='w+', encoding='utf-8') as file: - file.write(prefix) - json.dump(resources, file, indent=4) - print('Resource file was created successfully and written to file:', outfile) - else: - print('Resource data is not valid according to schema.') - - -def _extract_row(row: tuple[str, str, str, str, str, str, str, str, str, str], wb: Workbook) -> dict[str, Any]: - """build a property dict from a row of the excel file""" - # get name - name = row[0] - # get labels - labels = {} - if row[1]: - labels['en'] = row[1] - if row[2]: - labels['de'] = row[2] - if row[3]: - labels['fr'] = row[3] - if row[4]: - labels['it'] = row[4] - # get comments - comments = {} - if row[5]: - comments['en'] = row[5] - if row[6]: - comments['de'] = row[6] - if row[7]: - comments['fr'] = row[7] - if row[8]: - comments['it'] = row[8] - # get super - sup = row[9] - - # load details for this resource - sh = wb[name] - property_list = [c for c in sh.iter_rows(min_row=2, values_only=True)] + name = row["name"] + labels = {lang: row[lang] for lang in languages if row.get(lang)} + comments = {lang: row[f"comment_{lang}"] for lang in languages if row.get(f"comment_{lang}")} + supers = [s.strip() for s in row["super"].split(",")] + + # load the cardinalities of this resource + details_df = pd.read_excel(excelfile, sheet_name=name) + details_df = prepare_dataframe( + df=details_df, + required_columns=["Property", "Cardinality"], + location_of_sheet=f"Sheet '{name}' in file '{excelfile}'" + ) cards = [] - # for each of the detail sheets - for i, prop in enumerate(property_list): - # get name and cardinality. - # GUI-order is equal to order in the sheet. + for j, detail_row in details_df.iterrows(): property_ = { - "propname": ":" + prop[0], - "cardinality": str(prop[1]), - "gui_order": i + 1 + "propname": ":" + detail_row["property"], + "cardinality": detail_row["cardinality"].lower(), + "gui_order": j + 1 # gui_order is equal to order in the sheet } cards.append(property_) - # return resource dict - return { + # build the dict structure of this resource and append it to the list of resources + resource = { "name": name, - "labels": labels, - "comments": comments, - "super": sup, - "cardinalities": cards + "super": supers, + "labels": labels } + if comments: + resource["comments"] = comments + resource["cardinalities"] = cards + + return resource + + +def resources_excel2json(excelfile: str, outfile: str) -> None: + """ + Converts properties described in an Excel file into a properties section which can be integrated into a DSP ontology + + Args: + excelfile: path to the Excel file containing the properties + outfile: path to the output JSON file containing the properties section for the ontology + + Returns: + None + """ + + # load file + all_classes_df: pd.DataFrame = pd.read_excel(excelfile) + all_classes_df = prepare_dataframe( + df=all_classes_df, + required_columns=["name", "super"], + location_of_sheet=f"Sheet 'classes' in file '{excelfile}'") + + # transform every row into a resource + resources = [_row2resource(row, excelfile) for i, row in all_classes_df.iterrows()] + + # write final list of all resources to JSON file, if list passed validation + if _validate_resources_with_schema(json.loads(json.dumps(resources, indent=4))): + with open(file=outfile, mode="w+", encoding="utf-8") as file: + file.write('"resources": ') + json.dump(resources, file, indent=4) + print("Resource file was created successfully and written to file ", outfile) + else: + print("Resource data is not valid according to schema.") diff --git a/requirements.txt b/requirements.txt index 39f4749e2..c72184fc8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,15 +12,19 @@ jsonpath-ng~=1.5.3 jsonschema~=4.6.0 lxml~=4.9.0 networkx~=2.8.4 +numpy~=1.22.4 openpyxl~=3.0.10 +pandas~=1.4.2 ply~=3.11 pyparsing~=2.4.7 pyrsistent~=0.18.1 pystrict~=1.2 +python-dateutil~=2.8.2 +pytz~=2022.1 rdflib~=6.1.1 requests~=2.28.0 rfc3987~=1.3.8 -setuptools~=62.4.0 +setuptools~=62.6.0 six~=1.16.0 urllib3~=1.26.9 validators~=0.20.0 diff --git a/setup.py b/setup.py index 6721ac798..d25bb0daa 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ "Operating System :: OS Independent", ], python_requires='>=3.9.0', - install_requires=['argparse~=1.4.0', "attrs~=21.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "certifi~=2022.6.15; python_version >= '3.6'", "charset-normalizer~=2.0.12; python_version >= '3.5'", 'click~=8.1.3', "decorator~=5.1.1; python_version >= '3.5'", "et-xmlfile~=1.1.0; python_version >= '3.6'", "idna~=3.3; python_version >= '3.5'", 'isodate~=0.6.1', 'jsonpath-ng~=1.5.3', 'jsonschema~=4.6.0', 'lxml~=4.9.0', 'networkx~=2.8.4', 'openpyxl~=3.0.10', 'ply~=3.11', 'pyparsing~=2.4.7', "pyrsistent~=0.18.1; python_version >= '3.7'", 'pystrict~=1.2', 'rdflib~=6.1.1', 'requests~=2.28.0', 'rfc3987~=1.3.8', "setuptools~=62.4.0; python_version >= '3.7'", "six~=1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "urllib3~=1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 'validators~=0.20.0' + install_requires=['argparse~=1.4.0', "attrs~=21.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "certifi~=2022.6.15; python_version >= '3.6'", "charset-normalizer~=2.0.12; python_version >= '3.5'", 'click~=8.1.3', "decorator~=5.1.1; python_version >= '3.5'", "et-xmlfile~=1.1.0; python_version >= '3.6'", "idna~=3.3; python_version >= '3.5'", 'isodate~=0.6.1', 'jsonpath-ng~=1.5.3', 'jsonschema~=4.6.0', 'lxml~=4.9.0', 'networkx~=2.8.4', "numpy~=1.22.4; python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'", 'openpyxl~=3.0.10', 'pandas~=1.4.2', 'ply~=3.11', 'pyparsing~=2.4.7', "pyrsistent~=0.18.1; python_version >= '3.7'", 'pystrict~=1.2', "python-dateutil~=2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 'pytz~=2022.1', 'rdflib~=6.1.1', 'requests~=2.28.0', 'rfc3987~=1.3.8', "setuptools~=62.6.0; python_version >= '3.7'", "six~=1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "urllib3~=1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 'validators~=0.20.0' ], entry_points={ 'console_scripts': [ diff --git a/test/e2e/test_tools.py b/test/e2e/test_tools.py index d13ff3328..28135e55b 100644 --- a/test/e2e/test_tools.py +++ b/test/e2e/test_tools.py @@ -157,14 +157,11 @@ def test_excel_to_json_list(self) -> None: def test_excel_to_json_resources(self) -> None: resources_excel2json(excelfile='testdata/Resources.xlsx', - outfile='testdata/tmp/_resources-out.json') + outfile='testdata/tmp/_out_resources.json') def test_excel_to_json_properties(self) -> None: properties_excel2json(excelfile='testdata/Properties.xlsx', - outfile='testdata/tmp/_properties-out.json') - - def test_validate_ontology(self) -> None: - validate_ontology(self.test_onto_file) + outfile='testdata/tmp/_out_properties.json') def test_create_ontology(self) -> None: create_ontology(input_file=self.test_onto_file, diff --git a/test/unittests/test_excel_to_properties.py b/test/unittests/test_excel_to_properties.py new file mode 100644 index 000000000..fc7311dcd --- /dev/null +++ b/test/unittests/test_excel_to_properties.py @@ -0,0 +1,109 @@ +"""unit tests for excel to properties""" +import os +import unittest +import json +import jsonpath_ng +import jsonpath_ng.ext + +from knora.dsplib.utils import excel_to_json_properties as e2j + + +class TestExcelToProperties(unittest.TestCase): + + def setUp(self) -> None: + """Is executed before each test""" + os.makedirs("testdata/tmp", exist_ok=True) + + def test_excel2json(self) -> None: + excelfile = "testdata/Properties.xlsx" + outfile = "testdata/tmp/_out_properties.json" + e2j.properties_excel2json(excelfile, outfile) + + # define the expected values from the excel file + excel_names = ["correspondsToGenericAnthroponym", "hasAnthroponym", "hasGender", "isDesignatedAs", "hasTitle", + "hasStatus", "hasLifeYearAmount", "hasBirthDate", "hasGeometry", "hasRepresentation", + "hasRemarks", "hasTerminusPostQuem", "hasGND", "hasColor", "hasDecimal", "hasTime", + "hasInterval", "hasBoolean", "hasGeoname", "partOfDocument"] + excel_supers = [["hasLinkTo"], ["hasValue", "dcterms:creator"], ["hasValue"], ["hasValue"], ["hasLinkTo"], + ["hasValue"], ["hasValue"], ["hasValue"], ["hasGeometry"], ["hasRepresentation"], + ["hasValue", "dcterms:description"], ["hasValue"],["hasValue"], ["hasColor"], ["hasValue"], + ["hasValue"], ["hasValue"], ["hasValue"], ["hasValue"], ["isPartOf"]] + excel_objects = [":GenericAnthroponym", "TextValue", "ListValue", "ListValue", ":Titles", "ListValue", + "IntValue", "DateValue", "GeomValue", "Representation", "TextValue", "DateValue", "UriValue", + "ColorValue", "DecimalValue", "TimeValue", "IntervalValue", "BooleanValue", "GeonameValue", + ":Documents"] + + excel_labels = dict() + excel_labels["de"] = ["", "only German", "", "", "", "", "", "", "Geometrisches Objekt", + "hat eine Multimediadatei", "", "", "GND", "Farbe", "Dezimalzahl", "Zeit", + "Zeitintervall", "Bool'sche Variable", "Link zu Geonames", "ist Teil eines Dokuments"] + excel_labels["it"] = ["", "", "", "only Italian", "", "", "", "", "", "", "", "", "GND", "", "", "", "", "", "", ""] + + excel_comments = dict() + excel_comments["comment_fr"] = ["J'avais déjà examiné plusieurs propriétés quand, un jour, le notaire, qui me " + "donnait des indications nécessaires pour une de mes explorations, me dit :", + "Un étrange hasard m'a mis en possession de ce journal.", + "Je n'en sais rien du tout ; mais si vous voulez la voir, monsieur, voici les " + "indications précises pour la trouver.", + "Vous devrez arranger l'affaire avec le curé du village de --.\"", + "Un étrange hasard m'a mis en possession de ce journal.", "", "", "only French", "", "", + "", "J'avais déjà examiné plusieurs propriétés quand, un jour, le notaire, qui me " + "donnait des indications nécessaires pour une de mes explorations, me dit :", + "Gemeinsame Normdatei", "", "Chiffre décimale", "Temps", "", "", "", ""] + excel_comments["comment_it"] = ["Avevo già visto diverse proprietà quando un giorno il notaio,", + "Uno strano caso mi mise in possesso di questo diario.", + "Non ne so nulla; ma se volete vederla, signore, eccovi le indicazioni precise per trovarla.", + "Dovrete organizzare l'affare con il curato del villaggio di --\".", + "Uno strano caso mi mise in possesso di questo diario.", "", "", "", "only Italian", + "", "", "Avevo già visto diverse proprietà quando un giorno il notaio,", + "Gemeinsame Normdatei", "", "", "", "", "", "", ""] + + excel_gui_elements = ["Searchbox", "Richtext", "List", "Radio", "Searchbox", "List", "Spinbox", "Date", + "SimpleText", "Searchbox", "Textarea", "Date", "SimpleText", "Colorpicker", "Slider", + "TimeStamp", "Interval", "Checkbox", "Geonames", "Searchbox"] + + excel_gui_attributes_hasGender = {"hlist": "gender"} + excel_gui_attributes_hasGND = {"size": 100} + excel_gui_attributes_hasDecimal = {"min": 0.0, "max": 100.0} + + # read json file + with open(outfile) as f: + json_string = f.read() + json_string = "{" + json_string + "}" + json_file = json.loads(json_string) + + # extract infos from json file + json_names = [match.value for match in jsonpath_ng.parse("$.properties[*].name").find(json_file)] + json_supers = [match.value for match in jsonpath_ng.parse("$.properties[*].super").find(json_file)] + json_objects = [match.value for match in jsonpath_ng.parse("$.properties[*].object").find(json_file)] + + json_labels_all = [match.value for match in jsonpath_ng.parse("$.properties[*].labels").find(json_file)] + json_labels: dict[str, list[str]] = dict() + for lang in ["de", "it"]: + json_labels[lang] = [label.get(lang, "").strip() for label in json_labels_all] + + json_comments: dict[str, list[str]] = dict() + for lang in ["fr", "it"]: + json_comments[f"comment_{lang}"] = [resource.get("comments", {}).get(lang, "").strip() + for resource in json_file["properties"]] + + json_gui_elements = [match.value for match in jsonpath_ng.parse("$.properties[*].gui_element").find(json_file)] + + json_gui_attributes_hasGender = jsonpath_ng.ext.parse("$.properties[?name='hasGender'].gui_attributes").find(json_file)[0].value + json_gui_attributes_hasGND = jsonpath_ng.ext.parse("$.properties[?name='hasGND'].gui_attributes").find(json_file)[0].value + json_gui_attributes_hasDecimal = jsonpath_ng.ext.parse("$.properties[?name='hasDecimal'].gui_attributes").find(json_file)[0].value + + # make checks + self.assertListEqual(excel_names, json_names) + self.assertListEqual(excel_supers, json_supers) + self.assertListEqual(excel_objects, json_objects) + self.assertDictEqual(excel_labels, json_labels) + self.assertDictEqual(excel_comments, json_comments) + self.assertListEqual(excel_gui_elements, json_gui_elements) + self.assertDictEqual(excel_gui_attributes_hasGND, json_gui_attributes_hasGND) + self.assertDictEqual(excel_gui_attributes_hasDecimal, json_gui_attributes_hasDecimal) + self.assertDictEqual(excel_gui_attributes_hasGender, json_gui_attributes_hasGender) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/unittests/test_excel_to_resource.py b/test/unittests/test_excel_to_resource.py index 3cda7a782..04a63db2e 100644 --- a/test/unittests/test_excel_to_resource.py +++ b/test/unittests/test_excel_to_resource.py @@ -1,8 +1,11 @@ """unit tests for excel to resource""" import os import unittest - -from openpyxl import Workbook +import json +import jsonpath_ng +import jsonpath_ng.ext +import pandas as pd +import numpy as np from knora.dsplib.utils import excel_to_json_resources as e2j @@ -11,52 +14,103 @@ class TestExcelToResource(unittest.TestCase): def setUp(self) -> None: """Is executed before each test""" - os.makedirs('testdata/tmp', exist_ok=True) + os.makedirs("testdata/tmp", exist_ok=True) - def test_excel2json(self) -> None: - in_file = "testdata/Resources.xlsx" - out_file = "testdata/tmp/_out_res.json" - e2j.resources_excel2json(in_file, out_file) - self.assertTrue(os.path.exists(out_file)) - - def test_extract_row(self) -> None: - wb = Workbook() - ws_classes = wb.create_sheet("classes") - res_name = "ClassA" - row = ( - res_name, - "Class A", - "", - "", - "", - "A comment on Class A", - "", - "", - "", - "Resource", + def test_prepare_dataframe(self) -> None: + original_df = pd.DataFrame({ + " TitLE of Column 1 ": ["1", " 0-1 ", "1-n ", pd.NA, " ", " ", "", " 0-n ", np.nan], + " Title of Column 2 ": [None, "1", 1, "text", "text", "text", "text", "text", "text"], + "Title of Column 3": ["", pd.NA, None, "text", "text", "text", "text", np.nan, "text"] + }) + expected_df = pd.DataFrame({ + "title of column 1": [ "0-1", "1-n", "0-n"], + "title of column 2": [ "1", "1", "text"], + "title of column 3": [ "", "", ""] + }) + returned_df = e2j.prepare_dataframe( + df=original_df, + required_columns=[" TitLE of Column 1 ", " Title of Column 2 "], + location_of_sheet='' ) - for i, c in enumerate(row): - ws_classes.cell(row=2, column=i+1, value=c) - ws_class_a = wb.create_sheet(res_name) - ws_class_a["A2"] = "property1" - ws_class_a["B2"] = "1" - resource_dict = e2j._extract_row(row, wb) - expected_dict = { - 'name': 'ClassA', - 'labels': { - 'en': 'Class A' - }, - 'comments': { - 'en': 'A comment on Class A' - }, - 'super': 'Resource', - 'cardinalities': [{ - 'propname': ':property1', - 'cardinality': '1', - 'gui_order': 1 - }]} - self.assertDictEqual(resource_dict, expected_dict) - - -if __name__ == '__main__': + for expected, returned in zip(expected_df.iterrows(), returned_df.iterrows()): + _, expected_row = expected + _, returned_row = returned + self.assertListEqual(list(expected_row), list(returned_row)) + + def test_excel2json(self) -> None: + excelfile = "testdata/Resources.xlsx" + outfile = "testdata/tmp/_out_resources.json" + e2j.resources_excel2json(excelfile, outfile) + + # define the expected values from the excel file + excel_names = ["Owner", "Title", "GenericAnthroponym", "FamilyMember", "MentionedPerson", "Alias", "Image", + "Video", "Audio", "ZIP", "PDFDocument", "Annotation", "LinkObject", "RegionOfImage"] + excel_supers = [["Resource", "dcterms:fantasy"], ["Resource"], ["Resource"], ["Resource"], ["Resource"], + ["Resource"], ["StillImageRepresentation", "dcterms:image"], ["MovingImageRepresentation"], + ["AudioRepresentation"], ["ArchiveRepresentation"], ["DocumentRepresentation"], ["Annotation"], + ["LinkObj"], ["Region"]] + + excel_labels = dict() + excel_labels["en"] = ["Owner", "Title", "Generic anthroponym", "Family member", "Mentioned person", "Alias", + "Only English", "", "", "", "", "Annotation", "Link Object", "Region of an image"] + excel_labels["rm"] = ["Rumantsch", "Rumantsch", "Rumantsch", "Rumantsch", "Rumantsch", "Rumantsch", "", "", "", + "", "Only Rumantsch", "", "", ""] + excel_labels_of_image = {"en": "Only English"} + + excel_comments = dict() + excel_comments["comment_de"] = ["Ein seltsamer Zufall brachte mich in den Besitz dieses Tagebuchs.", "", + "Only German", "", "", "", "Bild", "Video", "Audio", "ZIP", "PDF-Dokument", + "Annotation", "Linkobjekt", ""] + excel_comments["comment_fr"] = ["Un étrange hasard m'a mis en possession de ce journal.", "", "", "Only French", + "", "", "", "", "", "", "", "", "", ""] + excel_comments_of_image = {"en": "Image", "de": "Bild"} + + excel_first_class_properties = [":hasAnthroponym", ":isOwnerOf", ":correspondsToGenericAnthroponym", ":hasAlias", + ":hasGender", ":isDesignatedAs", ":hasTitle", ":hasStatus", + ":hasFamilyRelationTo",":hasLifeYearAmount", ":hasBirthDate", ":hasDeathDate", + ":hasBibliography", ":hasRemarks"] + excel_first_class_cardinalities = ["1", "0-1", "0-n", "1", "0-n", "0-1", "1-n", "0-1", "1-n", "0-1", "0-1", + "0-1", "1-n", "1-n"] + + # read json file + with open(outfile) as f: + json_string = f.read() + json_string = "{" + json_string + "}" + json_file = json.loads(json_string) + + # extract infos from json file + json_names = [match.value for match in jsonpath_ng.parse("$.resources[*].name").find(json_file)] + json_supers = [match.value for match in jsonpath_ng.parse("$.resources[*].super").find(json_file)] + + json_labels_all = [match.value for match in jsonpath_ng.parse("$.resources[*].labels").find(json_file)] + json_labels: dict[str, list[str]] = dict() + for lang in ["en", "rm"]: + json_labels[lang] = [label.get(lang, "").strip() for label in json_labels_all] + json_labels_of_image = jsonpath_ng.ext.parse('$.resources[?name="Image"].labels').find(json_file)[0].value + + json_comments: dict[str, list[str]] = dict() + for lang in ["de", "fr"]: + # make sure the lists of the json comments contain a blank string even if there is no "comments" section + # at all in this resource + json_comments[f"comment_{lang}"] = [resource.get("comments", {}).get(lang, "").strip() + for resource in json_file["resources"]] + json_comments_of_image = jsonpath_ng.ext.parse('$.resources[?name="Image"].comments').find(json_file)[0].value + + json_first_class_properties = [match.value for match in + jsonpath_ng.parse("$.resources[0].cardinalities[*].propname").find(json_file)] + json_first_class_cardinalities = [match.value for match in + jsonpath_ng.parse("$.resources[0].cardinalities[*].cardinality").find(json_file)] + + # make checks + self.assertListEqual(excel_names, json_names) + self.assertListEqual(excel_supers, json_supers) + self.assertDictEqual(excel_labels, json_labels) + self.assertDictEqual(excel_labels_of_image, json_labels_of_image) + self.assertDictEqual(excel_comments, json_comments) + self.assertDictEqual(excel_comments_of_image, json_comments_of_image) + self.assertListEqual(excel_first_class_properties, json_first_class_properties) + self.assertListEqual(excel_first_class_cardinalities, json_first_class_cardinalities) + + +if __name__ == "__main__": unittest.main() diff --git a/testdata/Properties.xlsx b/testdata/Properties.xlsx index cccc21a0a..ac867acac 100644 Binary files a/testdata/Properties.xlsx and b/testdata/Properties.xlsx differ diff --git a/testdata/Resources.xlsx b/testdata/Resources.xlsx index 662e4cf55..5328f9af0 100644 Binary files a/testdata/Resources.xlsx and b/testdata/Resources.xlsx differ