Skip to content

Commit

Permalink
modification on arrange() to handle multiple sentences
Browse files Browse the repository at this point in the history
  • Loading branch information
boat-builder committed Aug 12, 2016
1 parent 61e9438 commit e38526d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 13 deletions.
9 changes: 5 additions & 4 deletions corenlp_pywrap/example.py
@@ -1,5 +1,6 @@
import pywrap as p
cn = p.CoreNLP()
r = cn.arrange('This is Sherin, He is good but I am bad')
for key, val in r.items():
print(key, val)
cn = p.CoreNLP(annotator_list = ['lemma'])
sent = '''Well, that is it then. Zimbabwe have thrashed Afghanistan. For a long while, it looked like Afghanistan held all the aces, but they have hurtled towards a heavy defeat. Coming back to the wicket, Hamza tried to nudge this back of a length delivery to fine leg. However, he got a faint edge on it. Mutumbami showed superb reflexes to dive to his left and snaffle the catch. Players from both sides shake hands as they make their way back to the pavilion. Amir Hamza c Mutumbami b Luke Jongwe 1(12)Luke Jongwe to Amir Hamza, THAT'S OUT!! Caught!!'''
r = cn.arrange(sent)
print(len(r['index']))
print(len(r['word']))
23 changes: 14 additions & 9 deletions corenlp_pywrap/pywrap.py
Expand Up @@ -17,7 +17,6 @@ class CoreNLP:
"relation", "natlog", "quote"]
url = 'http://127.0.0.1:9000'
out_format = 'json'
sentences = []

def __init__(self, url=url, annotator_list=annotator_full_list):
assert url.upper().startswith('HTTP'), \
Expand Down Expand Up @@ -102,11 +101,10 @@ def regex(cls, endpoint, data, pattern, custom_filter):
return cls.server_connection(current_url, data)

@staticmethod
def process_sentences(sentence):
assert isinstance(sentence, list), 'it should be a list'
assert len(sentence) == 1, 'assuming the lenght is one'
sent_dict = sentence[0]
tokens = sent_dict['tokens']
def process_sentences(sentences):
assert isinstance(sentences, list), 'it should be a list'
index = 0
new_index = 0
token_dict = {
'index':[],
'truecaseText':[],
Expand All @@ -122,9 +120,16 @@ def process_sentences(sentence):
'word':[],
'after':[]
}
for val in tokens:
for key, val in val.items():
token_dict[key].append(val)
for sentence in sentences:
index = new_index
tokens = sentence['tokens']
for val in tokens:
for key, val in val.items():
if key == 'index':
new_index = index + int(val)
token_dict[key].append(str(new_index))
else:
token_dict[key].append(val)
return token_dict


Expand Down

0 comments on commit e38526d

Please sign in to comment.