From 63b90f3c93ea6894310df2dc5557d2534c128384 Mon Sep 17 00:00:00 2001 From: Ivan Subotic Date: Wed, 5 Jun 2019 18:28:22 +0200 Subject: [PATCH] add bulk upload --- README.md | 2 +- knora/__init__.py | 4 +- knora/create_ontology.py | 8 +-- knora/knora.py | 112 +++++++++++++++++++++++++++++++++------ setup.py | 2 + 5 files changed, 105 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 6ca67628b..a29344d1d 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ To update to the latest version run: $ pip3 install --upgrade knora ``` -To install from source, i.e. this repositrory run: +To install from source, i.e. this repository run: ``` $ python3 setup.py install ``` diff --git a/knora/__init__.py b/knora/__init__.py index 099f3567a..32a97b341 100644 --- a/knora/__init__.py +++ b/knora/__init__.py @@ -1,3 +1,5 @@ from .knora import KnoraError -from .knora import knora +from .knora import Knora from .knora import BulkImport +from .knora import IrisLookup +from .knora import ListsLookup diff --git a/knora/create_ontology.py b/knora/create_ontology.py index ed937ba02..5246d922e 100644 --- a/knora/create_ontology.py +++ b/knora/create_ontology.py @@ -4,7 +4,7 @@ import argparse import json from jsonschema import validate -from knora import KnoraError, knora +from knora import KnoraError, Knora def main(): @@ -41,7 +41,7 @@ def main(): exit(0) # create the knora connection object - con = knora(args.server, args.user, args.password, ontology.get("prefixes")) + con = Knora(args.server, args.user, args.password, ontology.get("prefixes")) # bulk_templ = con.create_schema(ontology["project"]["shortcode"], ontology["project"]["ontology"]["name"]) @@ -93,7 +93,6 @@ def main(): "nodes": listnodes } - with open('lists.json', 'w', encoding="utf-8") as fp: json.dump(listrootnodes, fp, indent=3, sort_keys=True) @@ -214,7 +213,7 @@ def main(): con = None # force logout by deleting the connection object. -def list_creator(con: knora, proj_iri: str, list_iri: str, parent_iri: str, nodes: List[dict]): +def list_creator(con: Knora, proj_iri: str, list_iri: str, parent_iri: str, nodes: List[dict]): nodelist = [] for node in nodes: node_id = con.create_list_node( @@ -231,5 +230,6 @@ def list_creator(con: knora, proj_iri: str, list_iri: str, parent_iri: str, node nodelist.append({node["name"]: {"id": node_id}}) return nodelist + if __name__ == '__main__': main() \ No newline at end of file diff --git a/knora/knora.py b/knora/knora.py index 44035cf0f..57359a548 100755 --- a/knora/knora.py +++ b/knora/knora.py @@ -1,5 +1,4 @@ from typing import List, Set, Dict, Tuple, Optional -from pprint import pprint from urllib.parse import quote_plus from rdflib import Graph from lxml import etree @@ -77,8 +76,9 @@ - :Textarea -> cols=integer, rows=integer, width=percent, wrap=string(soft|hard) - :Checkbox - :Fileupload - """ + + class KnoraError(Exception): """Handles errors happening in this file""" @@ -86,7 +86,7 @@ def __init__(self, message): self.message = message -class knora: +class Knora: """ This is the main class which holds all the methods for communication with the Knora backend. """ @@ -603,7 +603,6 @@ def create_property( # labels = list(map(lambda p: {"@language": p[0], "@value": p[1]}, labels.items())) - if not comments: comments = {"en": "none"} @@ -765,7 +764,6 @@ def create_list_node(self, # labels = list(map(lambda p: {"language": p[0], "value": p[1]}, labels.items())) - listnode = { "projectIri": project_iri, "labels": labels, @@ -790,7 +788,6 @@ def create_list_node(self, jsondata = json.dumps(listnode, indent=3, separators=(',', ': ')) - req = requests.post(url, headers={'Content-Type': 'application/json; charset=UTF-8', 'Authorization': 'Bearer ' + self.token}, @@ -1066,7 +1063,7 @@ def create_valdict(val): } jsonstr = json.dumps(jsondata, indent=3, separators=(',', ': ')) - print(jsonstr) + # print(jsonstr) url = self.server + "/v2/resources" req = requests.post(url, headers={'Content-Type': 'application/json; charset=UTF-8', @@ -1082,7 +1079,6 @@ def create_valdict(val): 'vark': res['knora-api:versionArkUrl']['@value'] } - def list_creator(self, children: List): """ internal Helper function @@ -1108,7 +1104,7 @@ def create_schema(self, shortcode: str, shortname: str): :return: Dict with a simple description of the ontology """ turtle = self.get_ontology_graph(shortcode, shortname) - print(turtle) + # print(turtle) g = Graph() g.parse(format='n3', data=turtle) @@ -1178,7 +1174,7 @@ def create_schema(self, shortcode: str, shortname: str): # process superprop (there might be multiple superprops) if superprop not in resources[resclass][propcnt]["superprop"]: resources[resclass][propcnt]["superprop"].append(superprop) - pprint.pprint(resources[resclass]) + # pprint.pprint(resources[resclass]) propcnt += 1 continue else: @@ -1198,7 +1194,7 @@ def create_schema(self, shortcode: str, shortname: str): "card": card, "cardval": row.cardval.toPython() }) - pprint.pprint(resources[resclass]) + # pprint.pprint(resources[resclass]) if superprop == "hasLinkTo": link_otypes.append(objtype) propindex[propname] = propcnt @@ -1268,6 +1264,7 @@ def __init__(self, schema: Dict): "knoraXmlImport": "http://api.knora.org/ontology/knoraXmlImport/v1#" } self.root = etree.Element('{http://api.knora.org/ontology/knoraXmlImport/v1#}resources', nsmap=self.xml_prefixes) + self.project_shortcode = schema["shortcode"] def new_xml_element(self, tag: str, options: Dict = None, value: str = None): tagp = tag.split(':') @@ -1294,10 +1291,25 @@ def get_xml_string(self): This method returns the Bulk-Import XML as an UTF-8 encoded string. :return: UTF-8 encoded string. """ - string = etree.tostring(self.root, pretty_print=True, xml_declaration=True, encoding='utf-8').encode("utf-8") + string = etree.tostring(self.root, pretty_print=True, xml_declaration=True, encoding='utf-8') return string - def upload_xml(self, ): + def upload(self, user, password, hostname, port): + """ + Upload the Bulk-Import XML to the server. + :param user: the email of the user + :param password: the password of the user + :param hostname: the hostname (e.g., localhost, api.example.org, etc.) + :param port: the port where the API is running (e.g., 3333) + :return: the JSON response + """ + project_iri = "http://rdfh.ch/projects/" + self.project_shortcode + url_encoded_project_iri = urllib.parse.quote_plus(project_iri) + bulkimport_api_url = "http://" + hostname + ":" + port + "/v1/resources/xmlimport/" + url_encoded_project_iri + headers = {"Content-Type": "application/xml"} + r = requests.post(bulkimport_api_url, data=self.get_xml_string(), headers=headers, auth=(user, password)) + return r.json() + def add_resource(self, resclass: str, id: str, label: str, properties: Dict): """ @@ -1325,7 +1337,6 @@ def find_list_node_id(nodename: str, nodes: List): return node_id return None - def process_properties(propinfo: Dict, valuestr: any): """ Processes a property in order to generate the approptiate XML for V1 bulk import. @@ -1371,9 +1382,9 @@ def process_properties(propinfo: Dict, valuestr: any): if self.schema["lists"][listname]["id"] == list_id: nodes = self.schema["lists"][listname]["nodes"] value = find_list_node_id(str(valuestr), nodes) - if value == 'http://rdfh.ch/lists/0808/X6bb-JerQyu5ULruCGEO0w': - print("BANG!") - exit(0) + # if value == 'http://rdfh.ch/lists/0808/X6bb-JerQyu5ULruCGEO0w': + # print("BANG!") + # exit(0) elif propinfo["otype"] == 'DateValue': # processing and validating date format res = re.match('(GREGORIAN:|JULIAN:)?(\d{4})?(-\d{1,2})?(-\d{1,2})?(:\d{4})?(-\d{1,2})?(-\d{1,2})?', str(valuestr)) @@ -1460,3 +1471,70 @@ def process_properties(propinfo: Dict, valuestr: any): self.root.append(resnode) +class IrisLookup: + def __init__(self, local_id_to_iri_json): + self.iris = local_id_to_iri_json + + def get_resource_iri(self, local_id): + """ + Given the result of the bulk-import as json, allow retrieving the resource + IRI based on the local ID. + {'createdResources': [{'clientResourceID': 'LM_1', + 'label': '1', + 'resourceIri': 'http://rdfh.ch/0807/rNxoIK-oR_i0-lO21Y9-CQ'}, + {'clientResourceID': 'LM_2']} + + :param local_id: the local id. resulting JSON from a bulk import upload. + :return: + """ + + try: + resources = self.iris["createdResources"] + iri = "" + for resource in resources: + try: + res_id = resource["clientResourceID"] + if res_id == local_id: + iri = resource["resourceIri"] + else: + pass + except KeyError: + pass + + if iri == "": + return None + else: + return iri + except KeyError: + print("IrisLookup.get_resource_iri - 'createdResources' not found") + + def get_iris_json(self): + return self.iris + + +class ListsLookup: + def __init__(self, lists_json): + self.lists = lists_json + + def get_list_iri(self, listname): + return self.lists[listname]["id"] + + def get_list_node_iri(self, listname, nodename): + if nodename is not None: + nodes = self.lists[listname]["nodes"] + res = "" + for node in nodes: + try: + res = node[nodename]["id"] + except KeyError: + pass + + if res == "": + return None + else: + return res + else: + return None + + def get_lists_json(self): + return self.lists diff --git a/setup.py b/setup.py index fb36ed714..5d094fe29 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,8 @@ name='knora', version='1.1.0', description='A Python library and tools for the Knora-API', + long_description=long_description, + long_description_content_type="text/markdown", url='https://github.com/dhlab-basel/knora-py', author='Lukas Rosenthaler', author_email='lukas.rosenthaler@unibas.ch',