-
Notifications
You must be signed in to change notification settings - Fork 2
/
misc_stats.py
61 lines (45 loc) · 1.96 KB
/
misc_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import json
import os
import numpy as np
import re
def get_entity_count(folder_path):
entity_count = {}
for filename in os.listdir(folder_path):
print(filename)
with open(folder_path + "/" + filename) as json_file:
data = json.load(json_file)
for session in data['sessions']:
session_sentiment = []
parliament_sitting_date = re.findall(r"\d\d\d\d-\d\d-\d\d", data['filename'])[0]
for speech in session['speeches']:
for content in speech['content']:
sentiment = content['sentiment']
session_sentiment.append(sentiment)
for entity in content['entities']:
label = entity['label']
if label in entity_count:
entity_count[label] += 1
else:
entity_count[label] = 0
return entity_count
def get_speaker_entity_count(folder_path):
speaker_entity_count = {}
for filename in os.listdir(folder_path):
print(filename)
with open(folder_path + "/" + filename) as json_file:
data = json.load(json_file)
for session in data['sessions']:
session_sentiment = []
parliament_sitting_date = re.findall(r"\d\d\d\d-\d\d-\d\d", data['filename'])[0]
for speech in session['speeches']:
name = speech['speaker']
if name not in speaker_entity_count:
speaker_entity_count[name] = {}
for content in speech['content']:
for entity in content['entities']:
label = entity['label']
if label in speaker_entity_count[name]:
speaker_entity_count[name][label] += 1
else:
speaker_entity_count[name][label] = 0
return speaker_entity_count