Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: xml validation (DEV-1360) #230

Merged
merged 5 commits into from Sep 23, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
43 changes: 22 additions & 21 deletions knora/dsp_tools.py
Expand Up @@ -48,30 +48,29 @@ def program(user_args: list[str]) -> None:
subparsers = parser.add_subparsers(title='Subcommands', description='Valid subcommands are', help='sub-command help')

# create
parser_create = subparsers.add_parser('create', help='Upload an ontology and/or list(s) from a JSON file to the '
'DaSCH Service Platform')
parser_create = subparsers.add_parser('create', help='Upload a project and/or list(s) from a JSON project file to '
'the DaSCH Service Platform')
parser_create.set_defaults(action='create')
parser_create.add_argument('-s', '--server', type=str, default=default_localhost, help=url_text)
parser_create.add_argument('-u', '--user', default=default_user, help=username_text)
parser_create.add_argument('-p', '--password', default=default_pw, help=password_text)
parser_create.add_argument('-V', '--validate-only', action='store_true', help='Do only validation of JSON, no '
'upload of the ontology')
parser_create.add_argument('-V', '--validate-only', action='store_true', help='Only validate the project against '
'the JSON schema, without uploading it')
parser_create.add_argument('-l', '--lists-only', action='store_true', help='Upload only the list(s)')
parser_create.add_argument('-v', '--verbose', action='store_true', help=verbose_text)
parser_create.add_argument('-d', '--dump', action='store_true', help='dump test files for DSP-API requests')
parser_create.add_argument('datamodelfile', help='path to data model file')
parser_create.add_argument('projectfile', help='path to a JSON project file')

# get
parser_get = subparsers.add_parser('get', help='Get the ontology (data model) of a project from the DaSCH Service '
'Platform.')
parser_get = subparsers.add_parser('get', help='Get a project from the DaSCH Service Platform.')
parser_get.set_defaults(action='get')
parser_get.add_argument('-u', '--user', default=default_user, help=username_text)
parser_get.add_argument('-p', '--password', default=default_pw, help=password_text)
parser_get.add_argument('-s', '--server', type=str, default=default_localhost, help=url_text)
parser_get.add_argument('-P', '--project', type=str, help='Shortcode, shortname or iri of project', required=True)
parser_get.add_argument('-v', '--verbose', action='store_true', help=verbose_text)
parser_get.add_argument('datamodelfile', help='Path to the file the ontology should be written to',
default='onto.json')
parser_get.add_argument('projectfile', help='Path to the file the project should be written to',
default='project.json')

# xmlupload
parser_upload = subparsers.add_parser('xmlupload', help='Upload data from an XML file to the DaSCH Service Platform.')
Expand Down Expand Up @@ -101,7 +100,7 @@ def program(user_args: list[str]) -> None:

# excel2resources
parser_excel_resources = subparsers.add_parser('excel2resources', help='Create a JSON file from an Excel file '
'containing resources for a DSP ontology. ')
'containing resources for a DSP ontology. ')
parser_excel_resources.set_defaults(action='excel2resources')
parser_excel_resources.add_argument('excelfile', help='Path to the Excel file containing the resources',
default='resources.xlsx')
Expand Down Expand Up @@ -147,35 +146,37 @@ def program(user_args: list[str]) -> None:
if args.action == 'create':
if args.lists_only:
if args.validate_only:
validate_lists_section_with_schema(path_to_json_project_file=args.datamodelfile)
validate_lists_section_with_schema(path_to_json_project_file=args.projectfile)
print('"Lists" section of the JSON project file is syntactically correct and passed validation.')
exit(0)
else:
create_lists(input_file=args.datamodelfile,
create_lists(input_file=args.projectfile,
server=args.server,
user=args.user,
password=args.password,
dump=args.dump)
else:
if args.validate_only and validate_project(args.datamodelfile):
print('Data model is syntactically correct and passed validation.')
if args.validate_only:
validate_project(args.projectfile)
print('JSON project file is syntactically correct and passed validation.')
exit(0)
else:
create_project(input_file=args.datamodelfile,
create_project(input_file=args.projectfile,
server=args.server,
user_mail=args.user,
password=args.password,
verbose=args.verbose,
dump=args.dump if args.dump else False)
elif args.action == 'get':
get_ontology(project_identifier=args.project,
outfile=args.datamodelfile,
outfile=args.projectfile,
server=args.server,
user=args.user,
password=args.password,
verbose=args.verbose)
elif args.action == 'xmlupload':
if args.validate:
validate_xml_against_schema(input_file=args.xmlfile,
schema_file="knora/dsplib/schemas/data.xsd")
validate_xml_against_schema(input_file=args.xmlfile)
else:
xml_upload(input_file=args.xmlfile,
server=args.server,
Expand All @@ -187,13 +188,13 @@ def program(user_args: list[str]) -> None:
incremental=args.incremental)
elif args.action == 'excel2lists':
excel2lists(excelfolder=args.excelfolder,
outfile=args.outfile)
path_to_output_file=args.outfile)
elif args.action == 'excel2resources':
excel2resources(excelfile=args.excelfile,
outfile=args.outfile)
path_to_output_file=args.outfile)
elif args.action == 'excel2properties':
excel2properties(excelfile=args.excelfile,
outfile=args.outfile)
path_to_output_file=args.outfile)
elif args.action == 'id2iri':
id_to_iri(xml_file=args.xmlfile,
json_file=args.jsonfile,
Expand Down
9 changes: 7 additions & 2 deletions knora/dsplib/utils/onto_create_lists.py
Expand Up @@ -71,10 +71,15 @@ def create_lists(
dump: bool = False
) -> Tuple[dict[str, Any], bool]:
"""
This method uploads the "lists" section of a JSON project definition file to a DSP server. If the JSON project file
is still unparsed, this method parses it, expands the Excel sheets that are referenced, and validates it.
This method uploads the "lists" section of a JSON project definition file to a DSP server. The project must already
exist on the DSP server.

If the JSON project file is passed as "input_file", this method parses it, expands the Excel sheets that are
referenced, and validates it. If it is passed as "project_definition", these preliminary steps are not necessary.

The "lists" section of the parsed project definition is then uploaded to the DSP server. If a list with the same
name is already existing in this project on the DSP server, this list is skipped.

Returns a tuple consisting of a dict and a bool. The dict contains the IRIs of the created list nodes. If there are
no lists in the project definition, an empty dictionary is returned. The bool indicates if everything went smoothly
during the process. If a warning or error occurred (e.g. one of the lists already exists, or one of the nodes could
Expand Down
6 changes: 4 additions & 2 deletions knora/dsplib/utils/shared.py
Expand Up @@ -2,6 +2,7 @@
import unicodedata
import pandas as pd
import regex
import os
from lxml import etree
from requests import RequestException
from datetime import datetime
Expand Down Expand Up @@ -87,17 +88,18 @@ def try_network_action(
raise BaseError(failure_msg)


def validate_xml_against_schema(input_file: str, schema_file: str) -> bool:
def validate_xml_against_schema(input_file: str) -> bool:
"""
Validates an XML file against an XSD schema

Args:
input_file: the XML file to be validated
schema_file: the schema against which the XML file should be validated

Returns:
True if the XML file is valid. Otherwise, a BaseError with a detailed error log is raised
"""
current_dir = os.path.dirname(os.path.realpath(__file__))
schema_file = os.path.join(current_dir, "../schemas/data.xsd")
xmlschema = etree.XMLSchema(etree.parse(schema_file))
doc = etree.parse(input_file)

Expand Down
4 changes: 1 addition & 3 deletions knora/dsplib/utils/xml_upload.py
Expand Up @@ -241,10 +241,8 @@ def xml_upload(input_file: str, server: str, user: str, password: str, imgdir: s
"""

# Validate the input XML file
current_dir = os.path.dirname(os.path.realpath(__file__))
schema_file = os.path.join(current_dir, '../schemas/data.xsd')
try:
validate_xml_against_schema(input_file, schema_file)
validate_xml_against_schema(input_file)
except BaseError as err:
print(f"=====================================\n"
f"{err.message}")
Expand Down