/
iaa.py
executable file
·81 lines (72 loc) · 2.22 KB
/
iaa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
from collections import defaultdict
import csv
import numpy as np
import sys
from sklearn import metrics
RANGES = (
(121, 150),
(241, 270),
(361, 390),
(481, 510),
(601, 630),
(721, 750),
(841, 870),
(961, 990),
(1081, 1110),
(1201, 1230),
(1321, 1350),
(1441, 1470)
)
value_map = {
'Ineligible': 0,
'Indeterminate': 1,
'Eligible (conditionally)': 2,
'Eligible (unconditionally)': 3,
}
if __name__ == '__main__':
data = {}
# populate data
with open(sys.argv[1]) as f:
reader = csv.reader(f)
header = next(reader)
categories = header[2:-1]
for category in categories:
data[category] = defaultdict(list)
for row in reader:
id = int(row[0])
for x in zip(categories, row[2:-1]):
x = list(x)
x[1] = value_map.get(x[1])
if x[1] is not None:
data[x[0]][id].append(x[1])
# # only keep ids that contain two annotations
# for category in categories:
# dc = data[category]
# for id in dc.keys():
# if len(dc[id]) != 2:
# del dc[id]
# split and calculate
for category in categories:
dc = data[category]
kappas = []
for r in RANGES:
r = range(r[0], r[1] + 1)
y1 = [dc[id][0] for id in r if len(dc[id]) == 2]
y2 = [dc[id][1] for id in r if len(dc[id]) == 2]
assert(len(y1) == len(y2))
if len(y1) > 0:
ck = metrics.cohen_kappa_score(y1, y2)
print("%s %s y1: %s" % (category, r, y1))
print("%s %s y2: %s" % (category, r, y2))
print("%s %s confusion matrix:" % (category, r))
print(metrics.confusion_matrix(y1, y2))
print("%s %s count: %d" % (category, r, len(y1)))
print("%s %s accuracy: %f" % (category, r, metrics.accuracy_score(y1, y2)))
print("%s %s kappa: %f" % (category, r, ck))
print()
kappas.append(ck)
mean_kappa = np.mean(kappas)
print()
print("%s mean: %f" % (category, mean_kappa))
print()