-
Notifications
You must be signed in to change notification settings - Fork 0
/
QuantifyConsensusK1_CLEFeHealth.py
258 lines (188 loc) · 9.64 KB
/
QuantifyConsensusK1_CLEFeHealth.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# -*- coding: utf-8 -*-
"""
Created in August 2019
@author: Zhengui
"""
import numpy as np
import matplotlib.pyplot as plt
## -- Function to read all the rankings of the 12 teams for the 66 queries --
def extractRankings(input_file, topN, output_file):
fr = open(input_file, 'r')
# Read file (line by line)
i = 0
previous_inquire_name = ''
rank_list = []
for line in fr:
#print(line)
line_list = line.split()
#print(line_list[2]) # print the ranked item
inquire = line_list[0].split('.') # split e.g., 'qtest.1' to 'qtest' and '1'
#print(inquire)
if inquire[1] != previous_inquire_name and inquire[1] != '62':
previous_inquire_name = inquire[1]
#print(previous_inquire_name)
curr_rank = []
curr_rank.append(line_list[2])
j = 1
elif inquire[1] != 62: # some teams gave the ranking for query 62
j = j + 1
if j <= topN:
curr_rank.append(line_list[2])
#print(curr_rank)
if j == topN:
rank_list.append(curr_rank)
else:
continue
fr.close()
# print(rank_list)
# Save results
fw = open(output_file, 'w')
for rank in rank_list:
str_line = ' '.join(rank)
fw.write(str_line)
fw.write('\n')
fw.close()
return rank_list
## ---------------------------------------------------------------------------
## -------------------------- function to calculate k_1 --------------------------
def CalK1(gamma, q_ratio, rankings, N1):
num_experts = len(rankings) # number of rankings in R
q = num_experts / q_ratio # q-support
K1 = np.zeros(num_experts) # to record K1 for each ranking
A_R = [] # to record all the A matrices
pos_mean_R = [] # to record the mean position of the items in all rankings
# Caculate k1^{i}(q) for all rankings
for i in range(num_experts):
ranking_i = rankings[i] # get the ith ranking
A = np.zeros((len(ranking_i), len(ranking_i))) # inintialize matrix A
pos_mean = np.zeros((len(ranking_i), len(ranking_i)))
index_j = 0
for j in ranking_i: # for the item in ranking_i
f_j = 0 # initial value of the f function for item j of raning_i
pos = np.zeros(num_experts) # to record the position of j in all rankings
exist_j = 0 # to denote if item j already considered in other rankings
# -- if no. of unconsidered rankings greater than q,
# -- check all considered rankings --
if num_experts-i >= q:
for i2 in range(num_experts):
# Get the potision of the selected item j in expters' rankings
if j in rankings[i2]:
rank_j = np.array([rankings[i2].index(j)])
else:
rank_j = np.array([])
# H function
H_j = 0
if rank_j.size > 0: # if the item is in the ranking
if i2 < i: # if the item j existing in one already considred ranking
if A_R[i2][rank_j[0]][rank_j[0]] != 0: # if q-support pattern
pos_mean[index_j][index_j] = pos_mean_R[i2][rank_j[0]][rank_j[0]]
pos[i] = index_j + 1
pos_var = np.abs(pos[i]- pos_mean[index_j][index_j]) # deviation of current postion from mean
A[index_j][index_j] = 1 * np.power(gamma, pos_var)
exist_j = 1
break
else:
exist_j = 1
break
else: # if current pattern not considered in constructed matrices
H_j = 1 # count 1
pos[i2] = rank_j + 1 # index starts from 0 in phython, so position = rank_j+1
f_j = f_j + H_j # f function
# check if there is a possibility to be q-support
if f_j+num_experts-(i2+1) < q: # if no possibility
break
# set the value of A
if f_j < q and exist_j == 0:
A[index_j][index_j] = 0
elif f_j >= q and exist_j == 0:
pos_mean[index_j][index_j] = np.sum(pos) / f_j
pos_var = np.abs(pos[i]- pos_mean[index_j][index_j]) # deviation of current postion from mean
A[index_j][index_j] = 1 * np.power(gamma, pos_var)
# -- if no. of unconsidered rankings less than q,
# -- no need to check all considered rankings --
else:
for i2 in range(int(num_experts-q+1)): # only check the first N-q+1 rankings
# Get the potision of the selected item j in expters' rankings
if j in rankings[i2]:
rank_j = np.array([rankings[i2].index(j)])
else:
rank_j = np.array([])
if rank_j.size > 0: # if the item is in the ranking
if A_R[i2][rank_j[0]][rank_j[0]] != 0: # if q-support pattern
pos_mean[index_j][index_j] = pos_mean_R[i2][rank_j[0]][rank_j[0]]
pos[i] = index_j + 1
pos_var = np.abs(pos[i]- pos_mean[index_j][index_j]) # deviation of current postion from mean
A[index_j][index_j] = 1 * np.power(gamma, pos_var)
break
index_j = index_j + 1
K1[i] = np.sum(A) / N1
A_R.append(A)
pos_mean_R.append(pos_mean)
return K1
## --------------------------------------------------------------------------
## ---------------------------------- MAIN -----------------------------
if __name__ == '__main__':
print('Starting...')
##---Read all the rankings of the 12 teams for the 66 queries----
INPUT_FILES = ['CUNI_EN_Run.1.dat', 'ECNU_EN_Run.1.dat', 'FDUSGInfo_EN_Run.1.dat', 'GRIUM_EN_Run.1.dat', 'KISTI_EN_RUN.1.dat', 'KUCS_EN_Run.1.dat', 'LIMSI_EN_run.1.dat', 'Miracl_EN_Run.1.dat', 'TeamHCMUS_EN_Run.1.dat', 'UBML_EN_Run.1.dat', 'USST_EN_Run.1.dat', 'YorkU_EN_Run.1.dat']
OUTPUT_FILES = ['TOP20_CUNI_EN.dat', 'TOP20_ECNU_EN.dat', 'TOP20_FDUSGInfo_EN.dat', 'TOP20_GRIUM_EN.dat', 'TOP20_KISTI_EN.dat', 'TOP20_KUCS_EN.dat', 'TOP20_LIMSI_EN.dat', 'TOP20_Miracl_EN.dat', 'TOP20_TeamHCMUS_EN.dat', 'TOP20_UBML_EN.dat', 'TOP20_USST_EN.dat', 'TOP20_YorkU_EN.dat']
rankings_all = []
for i in range(len(INPUT_FILES)):
input_file = 'Run1\\' + INPUT_FILES[i]
output_file = OUTPUT_FILES[i]
topN = 20 # Care about topN items
rank_list = extractRankings(input_file, topN, output_file) # rankings of a team for all queries
rankings_all.append(rank_list) # record all teams' rankings
##------------- Fixed gamma, q_ratio -------------
gamma = 0.9
q_ratio = 2 # no. of rankings/q_ratio = q-support
N1 = topN
num_experts = len(rankings_all) # 12 teams
# extract the ranking set for query j (j = 0, ..., 65)
j = 0
K1 = []
K1_mean = []
K1_varUp = []
K1_varDown = []
for j in range(len(rankings_all[0])):
R_j = [] # to record the rankings of all the teams for query j
for i in range(num_experts): # expert i
R_j.append(rankings_all[i][j])
K1_j = CalK1(gamma, q_ratio, R_j, N1)
K1_j_mean = np.mean(K1_j)
K1_j_varUp = np.max(K1_j) - K1_j_mean
K1_j_varDown = K1_j_mean - np.min(K1_j)
j = j + 1
K1.append(K1_j)
K1_mean.append(K1_j_mean)
K1_varUp.append(K1_j_varUp)
K1_varDown.append(K1_j_varDown)
query_list = [x for x in range(1,68) if x !=62]
plt.figure()
plt.bar(query_list, K1_mean)
plt.grid('False')
plt.xlim([0,68])
plt.xlabel('Query')
plt.ylabel(r'$\bar{\kappa}_1(6)$')
#plt.ylabel(r'$\bar{\kappa}_1(\lceil\frac{N}{2}\rceil)$')
plt.savefig("K1meanCLEF.png")
plt.show()
temp1 = np.sort(K1_mean)
temp2 = np.argsort(K1_mean)
# ------------------ for gamma = 0.9 ---------------
K1_mean[57] # for query 58
# 0.472145105609275
K1_mean[24]
# 0.44710891712542516
K1_mean[23]
# 0.41779620456906547
K1_mean[54]
# 0.4042048125135694
K1_mean[62] # for query 64
# 0.026077969900242054
K1_mean[47]
# 0.03583320112287755
K1_mean[10]
# 0.08654263660739923
K1_mean[32]
# 0.10816580872128433