From 0bc6149b7d0c92fcc0759f4b7161682896542c58 Mon Sep 17 00:00:00 2001 From: irinaschubert Date: Thu, 23 Sep 2021 18:33:07 +0200 Subject: [PATCH] feat(schema): add error codes for validation (DSP-1902) (#101) * use validate instead of assertValid for schema validation * add documentation for json schema * remove namespace in in put xml for processing * Update dsp-tools-create.md * add documentation for xsd schema reference * update examples in documentation * add exit codes and error messages when validation fails --- docs/dsp-tools-create.md | 43 +++++++++------------- docs/dsp-tools-xmlupload.md | 28 +++++++------- knora/dsp_tools.py | 5 ++- knora/dsplib/utils/onto_create_lists.py | 2 +- knora/dsplib/utils/onto_create_ontology.py | 2 +- knora/dsplib/utils/onto_validate.py | 4 +- knora/dsplib/utils/xml_upload.py | 26 +++++++++++-- 7 files changed, 62 insertions(+), 48 deletions(-) diff --git a/docs/dsp-tools-create.md b/docs/dsp-tools-create.md index c9a53e9d4..fc3a7a783 100644 --- a/docs/dsp-tools-create.md +++ b/docs/dsp-tools-create.md @@ -19,10 +19,6 @@ definition and a short example of the definition. ## A short overview -In the following section, you find all the mentioned parts with a detailed explanation. Right at the beginning we look -at the basic fields that belong to an ontology definition. This serves as an overview for you to which you can return at -any time while you read the description. - A complete data model definition looks like this: ```json @@ -31,6 +27,7 @@ A complete data model definition looks like this: "foaf": "http://xmlns.com/foaf/0.1/", "dcterms": "http://purl.org/dc/terms/" }, + "$schema": "https://raw.githubusercontent.com/dasch-swiss/dsp-tools/main/knora/dsplib/schemas/ontology.json", "project": { "shortcode": "0123", "shortname": "BiZ", @@ -57,22 +54,14 @@ A complete data model definition looks like this: } ``` -As you can see, only two umbrella terms define our ontology: the "prefixes" object and the "project" object. In the -following we take a deeper look into both of them since, as you can see in the example above, both objects have further -fine-grained definition levels. - -### "Prefixes" object +### "prefixes" object `"prefixes": { "prefix": "", ...}` -The "prefixes" object contains - as you may already have guessed by the name - the `prefixes` of *external* ontologies -that are also used in the current project. All prefixes are composed of a keyword, followed by its iri. This is used as -a shortcut for later so that you don't always have to specify the full qualified iri but can use the much shorter -keyword instead. That means that e.g. instead of addressing a property called "familyname" via -`http://xmlns.com/foaf/0.1/familyName` you can simply use foaf:familyName. - -As you can see in the example below, you can have more than one prefix too. In the example we have "foaf" as well as -"dcterms" as our prefixes. +The `prefixes` object contains the prefixes of external ontologies that are used in the current project. All prefixes +are composed of the actual prefix and an IRI. The prefix is used as an abbreviation so one does not have to write the +full qualified IRI each time it is used. So, instead of writing a property called "familyname" as +`http://xmlns.com/foaf/0.1/familyName` one can simply use `foaf:familyName`. ```json { @@ -83,16 +72,17 @@ As you can see in the example below, you can have more than one prefix too. In t } ``` -### "Project" object +### "$schema" object -`"project": {"key": "", ...}` +The `$schema` object refers to the JSON schema for DSP data model definitions and is mandatory. -Right after the "prefix" object the "project" object has to follow, which contains all resources and properties of the -ontology. The "project" object is the bread and butter of the ontology. All its important properties are specified -therein. +`"$schema": "https://raw.githubusercontent.com/dasch-swiss/dsp-tools/main/knora/dsplib/schemas/ontology.json"` + +### "project" object + +`"project": {"key": "", ...}` -As you saw in the complete ontology definition in the beginning, the project definitions requires all the following data -fields: +The `project` object contains all resources and properties of the ontology. It requires all the following data fields: - shortcode - shortname @@ -100,14 +90,14 @@ fields: - keywords - ontologies -Whereas the following fields are optional (if one or more of these fields are not used, it must be omitted): +The following fields are optional (if one or more of these fields are not used, they should be omitted): - descriptions - lists - groups - users -So, a simple example definition of the "project" object could look like this: +A simple example definition of the "project" object looks like this: ```json { @@ -1260,6 +1250,7 @@ Finally, here is a complete example of an ontology definition: "foaf": "http://xmlns.com/foaf/0.1/", "dcterms": "http://purl.org/dc/terms/" }, + "$schema": "https://raw.githubusercontent.com/dasch-swiss/dsp-tools/main/knora/dsplib/schemas/ontology.json", "project": { "shortcode": "0170", "shortname": "teimp", diff --git a/docs/dsp-tools-xmlupload.md b/docs/dsp-tools-xmlupload.md index d0344695e..77e16c0b2 100644 --- a/docs/dsp-tools-xmlupload.md +++ b/docs/dsp-tools-xmlupload.md @@ -13,24 +13,24 @@ The import file must start with the standard XML header: ## The root element <knora> -The `` element describes a set of resources that are to be imported. It is the container for an arbitrary number -of `` elements and may only contain resource and permissions tags. - -The `` element has the following attributes: +The `` element describes all resources that should be imported. It has the following attributes: +- `xmlns`: `"https://dasch.swiss/schema"` (required) - `xmlns:xsi`: `"http://www.w3.org/2001/XMLSchema-instance"` (required) -- `xsi:schemaLocation`: path to the XML schema file for validation (optional) +- `xsi:schemaLocation`: `"https://dasch.swiss/schema https://raw.githubusercontent.com/dasch-swiss/dsp-tools/main/knora/dsplib/schemas/data.xsd"` ( + required) - `shortcode`: project shortcode, e.g. "0801" (required) - `default-ontology`: name of the ontology (required) -Thus, the `` element may look as follows: +The `` element may look as follows: ```xml + xmlns="https://dasch.swiss/schema" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="https://dasch.swiss/schema https://raw.githubusercontent.com/dasch-swiss/dsp-tools/main/knora/dsplib/schemas/data.xsd" + shortcode="0806" + default-ontology="webern"> ... ``` @@ -118,9 +118,11 @@ A complete `` section may look as follows: ```xml + xmlns="https://dasch.swiss/schema" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="https://dasch.swiss/schema https://raw.githubusercontent.com/dasch-swiss/dsp-tools/main/knora/dsplib/schemas/data.xsd" + shortcode="0806" + default-ontology="webern"> RV diff --git a/knora/dsp_tools.py b/knora/dsp_tools.py index a28251487..e7ccd6522 100644 --- a/knora/dsp_tools.py +++ b/knora/dsp_tools.py @@ -133,7 +133,10 @@ def program(user_args: list[str]) -> None: dump=args.dump) else: if args.validate: - validate_ontology(args.datamodelfile) + if validate_ontology(args.datamodelfile): + exit(0) + else: + exit(1) else: create_ontology(input_file=args.datamodelfile, lists_file=args.listfile, diff --git a/knora/dsplib/utils/onto_create_lists.py b/knora/dsplib/utils/onto_create_lists.py index 57aa9e86b..b28df282c 100644 --- a/knora/dsplib/utils/onto_create_lists.py +++ b/knora/dsplib/utils/onto_create_lists.py @@ -67,7 +67,7 @@ def create_lists(input_file: str, lists_file: str, server: str, user: str, passw if validate_ontology(data_model): pass else: - quit() + exit(1) # Connect to the DaSCH Service Platform API con = Connection(server) diff --git a/knora/dsplib/utils/onto_create_ontology.py b/knora/dsplib/utils/onto_create_ontology.py index 88aada94a..38f3a4062 100644 --- a/knora/dsplib/utils/onto_create_ontology.py +++ b/knora/dsplib/utils/onto_create_ontology.py @@ -71,7 +71,7 @@ def create_ontology(input_file: str, if validate_ontology(data_model): pass else: - quit() + exit(1) # make the connection to the server con = login(server=server, diff --git a/knora/dsplib/utils/onto_validate.py b/knora/dsplib/utils/onto_validate.py index f7aa6dac1..3c4154476 100644 --- a/knora/dsplib/utils/onto_validate.py +++ b/knora/dsplib/utils/onto_validate.py @@ -28,7 +28,7 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool data_model = json.loads(onto_json_str) else: print('Input is not valid.') - quit() + exit(1) # expand all lists referenced in the list section of the data model new_lists = expand_lists_from_excel(data_model) @@ -45,7 +45,7 @@ def validate_ontology(input_file_or_json: Union[str, Dict, os.PathLike]) -> bool try: validate(instance=data_model, schema=schema) except jsonschema.exceptions.ValidationError as err: - print(err) + print('Data model did not pass validation. The error message is:', err.message) return False print('Data model is syntactically correct and passed validation.') return True diff --git a/knora/dsplib/utils/xml_upload.py b/knora/dsplib/utils/xml_upload.py index 487dade14..eae78765a 100644 --- a/knora/dsplib/utils/xml_upload.py +++ b/knora/dsplib/utils/xml_upload.py @@ -503,12 +503,12 @@ def validate_xml_against_schema(input_file: str, schema_file: str) -> bool: Returns: True if the XML file is valid, False otherwise """ - xmlschema = etree.XMLSchema(schema_file) + xmlschema = etree.XMLSchema(etree.parse(schema_file)) doc = etree.parse(input_file) is_valid = False - if xmlschema.assertValid(doc): + if xmlschema.validate(doc): is_valid = True return is_valid @@ -535,12 +535,15 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s # Validate the input XML file current_dir = os.path.dirname(os.path.realpath(__file__)) - schema_file = etree.parse(os.path.join(current_dir, '../schemas/data.xsd')) + schema_file = os.path.join(current_dir, '../schemas/data.xsd') if validate_xml_against_schema(input_file, schema_file): print("The input data file is syntactically correct and passed validation!") if validate_only: - return True + exit(0) + else: + print("The input data file did not pass validation!") + exit(1) # Connect to the DaSCH Service Platform API and get the project context con = Connection(server) @@ -552,6 +555,21 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s # parse the XML file containing the data tree = etree.parse(input_file) + + # Iterate through all XML elements + for elem in tree.getiterator(): + # Skip comments and processing instructions, + # because they do not have names + if not ( + isinstance(elem, etree._Comment) + or isinstance(elem, etree._ProcessingInstruction) + ): + # Remove a namespace URI in the element's name + elem.tag = etree.QName(elem).localname + + # Remove unused namespace declarations + etree.cleanup_namespaces(tree) + knora = tree.getroot() default_ontology = knora.attrib['default-ontology'] shortcode = knora.attrib['shortcode']