/
metrics.py
180 lines (163 loc) · 5.53 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import numpy as np
# Computes the predicted rating using either U or V embeddings
def predict(U, bias_u, Test, nrows):
data = Test.data
rows = Test.row
cols = Test.col
tuple = []
for c in range(len(data)):
s = sum(U[rows[c], :]*U[cols[c]+nrows, :]) + bias_u[rows[c]] \
+ bias_u[cols[c] + nrows]
tuple.append((rows[c], cols[c], data[c], s))
return tuple
# Computes the predicted rating using both U and V embeddings
def predictuv(U, V, bias_u, bias_v, Test, nrows):
data = Test.data
rows = Test.row
cols = Test.col
tuple = []
for c in range(len(data)):
s = sum(U[rows[c], :]*V[cols[c]+nrows, :]) + bias_u[rows[c]] \
+ bias_v[cols[c] + nrows]
tuple.append((rows[c], cols[c], data[c], s))
return tuple
# Precision is basically the average of total number of relevant
# recommendations by the top n recommendations for each user.
def cal_precision(dicTopn, n, thr):
def getkey(tp):
return tp[1]
num_good_user = 0.0
Prec = 0.0
for uid in dicTopn:
z = dicTopn[uid]
if len(z) < n:
continue # skip users with less than n ratings
x = [(z[mid]['t'], z[mid]['p']) for mid in z]
x_sorted = sorted(x, key=getkey, reverse=True)
sumP = 0.0
num_good_user += 1.0
for i in range(n):
if x_sorted[i][0] >= thr:
sumP += 1.0
Prec += sumP/n
if num_good_user < 1.0:
print('no valid users, ERROR metric')
return 0.0
Prec = Prec/num_good_user
return Prec
# Recall is the number of relevant items in the top n recommendations divided
# by the total number of relevant items (which can be maximum of n)
def cal_recall(dicTopn, n, thr):
def getkey(tp):
return tp[1]
num_good_user = 0.0
Rec = 0.0
for uid in dicTopn:
z = dicTopn[uid]
if len(z) < n:
continue # skip users with less than n ratings
x = [(z[mid]['t'], z[mid]['p']) for mid in z]
act_tot = 0.0
for i in range(len(x)):
if x[i][0] >= thr:
act_tot += 1.0
if act_tot < 1.0:
continue # skip users without '1''s in ground truth
x_sorted = sorted(x, key=getkey, reverse=True)
sumP = 0.0
num_good_user += 1.0
for i in range(n):
if x_sorted[i][0] >= thr:
sumP += 1.0
Rec += float(sumP)/act_tot
if num_good_user < 1.0:
print('no valid users, ERROR metric')
return 0.0
Rec = Rec/num_good_user
return Rec
# Average Precision is the average of precision at which relevant items are
# recorded among the top n recommendations.
# MAP is the mean of the average precision over all the users.
def cal_map(dicTopn, n, thr):
def getkey(tp):
return tp[1]
MAP = 0.0
num_good_user = 0.0
for uid in dicTopn:
z = dicTopn[uid]
x = [(z[mid]['t'], z[mid]['p']) for mid in z]
act_tot = 0.0
for i in range(len(x)):
if x[i][0] >= thr:
act_tot += 1.0
if act_tot < 1.0:
continue # skip users without '1''s in ground truth
x_sorted = sorted(x, key=getkey, reverse=True)
sumP = 0.0
ap = 0.0
num_good_user += 1.0
upper = min(n, len(x))
for i in range(upper):
if x_sorted[i][0] >= thr:
sumP += 1.0
ap += sumP/float(i+1.0)
MAP += ap/min(upper, act_tot)
if num_good_user < 1.0:
print('no valid users, ERROR metric')
return 0.0
MAP = MAP/num_good_user
return MAP
# Normalized Discounted Cumulative Gain (NDCG) is normal discounted
# cumulative gain. IDCG is calculated based on the actual top N
# recommendations while DCG is calculated based on the predicted top N.
# NDCG = DCG/IDCG. NDCG@N applies to 2**x - 1 function on each rating before
# multiplying top ith item by 1/log2(i+1)
def cal_ndcg(dicTopn, n, thr):
def getkeydcg(tp):
return tp[1] # Predicted
def getkeyidcg(tp):
return tp[0] # True
NDCG = 0.0
num_good_user = 0.0
for uid in dicTopn:
z = dicTopn[uid]
if len(z) < n:
continue # skip users with less than n ratings
x = [(z[mid]['t'], z[mid]['p']) for mid in z]
dcg = 0.0
idcg = 0.0
num_good_user += 1.0
sorted_x1 = sorted(x, key=getkeydcg, reverse=True)
for i in range(n):
dcg += (2**sorted_x1[i][0]-1)/np.log2(i+2.0)
sorted_x2 = sorted(x, key=getkeyidcg, reverse=True)
for i in range(n):
idcg += (2**sorted_x2[i][0] - 1)/np.log2(i+2.0)
NDCG += dcg/idcg
if num_good_user < 1.0:
print('no valid users, ERROR metric')
return 0.0
NDCG = NDCG/num_good_user
return NDCG
# Assuming that we are reading results from saved prediction score file
# each line: userId, movieId, actual_rating, predicted_score
def parsetuples(tuple):
dic = {}
for c in tuple:
uid = c[0]
mid = c[1]
entry = {}
entry['t'] = float(c[2]) # Actual rating
entry['p'] = float(c[3]) # Predicted score
if uid not in dic:
dic[uid] = {}
dic[uid][mid] = entry
return dic
# Returns the outputs of evaluation metrics
def Calculate(tuple, n=10, thr=5):
dicTopn = parsetuples(tuple)
OutPrec = cal_precision(dicTopn, n, thr)
OutRec = cal_recall(dicTopn, n, thr)
OutMAP = cal_map(dicTopn, n, thr)
OutNDCG = cal_ndcg(dicTopn, n, thr)
return (OutPrec, OutRec, OutMAP, OutNDCG)