diff --git a/corenlp_pywrap/example.py b/corenlp_pywrap/example.py index ac22d74..a4e9acd 100644 --- a/corenlp_pywrap/example.py +++ b/corenlp_pywrap/example.py @@ -1,5 +1,7 @@ import pywrap as p -cn = p.CoreNLP(annotator_list = ['lemma']) +import logging +p.root.setLevel(logging.WARNING) +cn = p.CoreNLP() sent = '''Well, that is it then. Zimbabwe have thrashed Afghanistan. For a long while, it looked like Afghanistan held all the aces, but they have hurtled towards a heavy defeat. Coming back to the wicket, Hamza tried to nudge this back of a length delivery to fine leg. However, he got a faint edge on it. Mutumbami showed superb reflexes to dive to his left and snaffle the catch. Players from both sides shake hands as they make their way back to the pavilion. Amir Hamza c Mutumbami b Luke Jongwe 1(12)Luke Jongwe to Amir Hamza, THAT'S OUT!! Caught!!''' r = cn.arrange(sent) print(len(r['index'])) diff --git a/corenlp_pywrap/pywrap.py b/corenlp_pywrap/pywrap.py index eeda435..ded7aa5 100644 --- a/corenlp_pywrap/pywrap.py +++ b/corenlp_pywrap/pywrap.py @@ -1,10 +1,9 @@ import requests, logging, sys root = logging.getLogger('Root') -root.setLevel(logging.INFO) +root.setLevel(logging.WARNING) lhandler = logging.StreamHandler(sys.stdout) -lhandler.setLevel(logging.WARNING) formatter = logging.Formatter( '%(asctime)s [%(name)s]:%(levelname)s - %(message)s', '%Y-%m-%d %H:%M:%S') @@ -12,6 +11,7 @@ root.addHandler(lhandler) class CoreNLP: + root.debug('Object instantiating..') annotator_full_list = ["tokenize", "cleanxml", "ssplit", "pos", "lemma", "ner", "regexner", "truecase", "parse", "depparse", "dcoref", "relation", "natlog", "quote"] @@ -45,6 +45,7 @@ def __init__(self, url=url, annotator_list=annotator_full_list): @staticmethod def server_connection(current_url, data): + root.debug('server connection: ' + current_url) try: server_out = requests.post(current_url, data, @@ -80,7 +81,6 @@ def basic(self, data, out_format='json', serializer=''): current_url = self.url_calc(serializer) assert isinstance(data, str) and data, 'Enter valid string input' - root.debug('Trying: ' + current_url) return self.server_connection(current_url, data) @staticmethod @@ -118,7 +118,8 @@ def process_sentences(sentences): 'characterOffsetEnd':[], 'speaker':[], 'word':[], - 'after':[] + 'after':[], + 'normalizedNER':[] } for sentence in sentences: index = new_index @@ -129,7 +130,11 @@ def process_sentences(sentences): new_index = index + int(val) token_dict[key].append(str(new_index)) else: - token_dict[key].append(val) + try: + token_dict[key].append(val) + except KeyError: + token_dict[key] = [val] + root.info('key not found: ' + key) return token_dict