/
glm_ucb.py
189 lines (154 loc) · 6.36 KB
/
glm_ucb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
from .linear_icf import LinearICF
import numpy as np
import scipy.optimize
from collections import defaultdict
import scipy
from .most_popular import MostPopular
from .entropy import Entropy
from .log_pop_ent import LogPopEnt
class GLM_UCB(LinearICF):
"""Generalized Linear Model Bandit-Upper Confidence Bound.
It follows a similar process as Linear UCB based on the PMF formulation, but it also
adds a sigmoid form in the exploitation step and makes a time-dependent exploration [1]_.
References
----------
.. [1] Zhao, Xiaoxue, Weinan Zhang, and Jun Wang. "Interactive collaborative filtering."
Proceedings of the 22nd ACM international conference on Information & Knowledge Management. 2013.
"""
def __init__(self, num_lat, c=1.0, *args, **kwargs):
"""__init__.
Args:
args:
kwargs:
c (float):
num_lat (int): number of latent factors
"""
super().__init__(num_lat=num_lat, *args, **kwargs)
self.c = c
self.num_lat = num_lat
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def p(self, x):
return x
def reset(self, observation):
"""reset.
Args:
observation:
"""
train_dataset = observation
super().reset(train_dataset)
self.users_rec_rewards = defaultdict(list)
self.users_rec_items_means = defaultdict(list)
self.p_vals = dict()
np.seterr(under="ignore")
self.recent_predict = True
self.t = 0
def error_user_weight_function(self, p, u_rec_rewards, u_rec_items_means):
return np.sum(
np.array([(u_rec_rewards[t] - self.p(p.T @ u_rec_items_means[t])) *
u_rec_items_means[t]
for t in range(0, len(u_rec_items_means))]), 0)
def actions_estimate(self, candidate_actions):
"""actions_estimate.
Args:
candidate_actions: (user id, candidate_items)
Returns:
numpy.ndarray:
"""
uid = candidate_actions[0]
candidate_items = candidate_actions[1]
A = self.As[uid]
if len(self.users_rec_items_means[uid]) == 0:
self.p_vals[uid] = self.bs[uid]
else:
self.p_vals[uid] = scipy.optimize.root(
self.error_user_weight_function, self.p_vals[uid],
(self.users_rec_rewards[uid],
self.users_rec_items_means[uid])).x
cov = np.linalg.inv(A) * self.var
items_score = self.p(self.p_vals[uid][None,:] @ self.items_means[candidate_items].T) +\
self.c * np.sqrt(np.log(self.t+1)) *\
np.sqrt(np.sum(self.items_means[candidate_items].dot(cov) *\
self.items_means[candidate_items],axis=1))
items_score = items_score.flatten()
self.recent_predict = True
return items_score, None
def update(self, observation, action, reward, info):
"""update.
Args:
observation:
action: (user id, item)
reward (float): reward
info:
"""
uid = action[0]
item = action[1]
additional_data = info
max_item_mean = self.items_means[item, :]
self.users_rec_rewards[uid].append(reward)
self.users_rec_items_means[uid].append(max_item_mean)
self.As[uid] += max_item_mean[:, None].dot(max_item_mean[None, :])
if self.recent_predict:
self.t += 1
self.recent_predict = False
class GLM_UCBInit(GLM_UCB):
def __init__(self, init, *args, **kwargs):
super().__init__(*args, **kwargs)
self.init = init
def reset(self, observation):
train_dataset = observation
super().reset(train_dataset)
if self.init == 'entropy':
items_entropy = Entropy.get_items_entropy(
self.train_consumption_matrix)
self.items_bias = items_entropy
elif self.init == 'popularity':
items_popularity = MostPopular.get_items_popularity(
self.train_consumption_matrix, normalize=False)
self.items_bias = items_popularity
elif self.init == 'logpopent':
items_entropy = Entropy.get_items_entropy(
self.train_consumption_matrix)
items_popularity = MostPopular.get_items_popularity(
self.train_consumption_matrix, normalize=False)
self.items_bias = LogPopEnt.get_items_logpopent(
items_popularity, items_entropy)
elif self.init == 'rand_popularity':
items_popularity = MostPopular.get_items_popularity(
self.train_consumption_matrix, normalize=False)
items_popularity[np.argsort(items_popularity)[::-1][100:]] = 0
self.items_bias = items_popularity
elif self.init == 'random':
self.items_bias = np.random.rand(
self.train_dataset.num_total_items)
self.items_bias = self.items_bias - np.min(self.items_bias)
self.items_bias = self.items_bias / np.max(self.items_bias)
assert (self.items_bias.min() >= 0
and np.isclose(self.items_bias.max(), 1))
res = scipy.optimize.minimize(
lambda x, items_means, items_bias: np.sum(
(items_bias - x @ items_means.T)**2),
np.ones(self.num_latent_factors),
args=(self.items_means, self.items_bias),
method='BFGS',
)
self.initial_b = res.x
print(
np.corrcoef(self.items_bias,
self.initial_b @ self.items_means.T)[0, 1])
self.bs = defaultdict(lambda: self.initial_b.copy())
class GLM_UCBEntropy(GLM_UCBInit):
def __init__(self, *args, **kwargs):
super().__init__(init='entropy', *args, **kwargs)
class GLM_UCBPopularity(GLM_UCBInit):
def __init__(self, *args, **kwargs):
super().__init__(init='popularity', *args, **kwargs)
class GLM_UCBRandPopularity(GLM_UCBInit):
def __init__(self, *args, **kwargs):
super().__init__(init='rand_popularity', *args, **kwargs)
class GLM_UCBRandom(GLM_UCBInit):
def __init__(self, *args, **kwargs):
super().__init__(init='random', *args, **kwargs)
class GLM_UCBLogPopEnt(GLM_UCBInit):
def __init__(self, *args, **kwargs):
super().__init__(init='logpopent', *args, **kwargs)