/
linear_ucb.py
72 lines (58 loc) · 1.97 KB
/
linear_ucb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
from .linear_icf import LinearICF
class LinearUCB(LinearICF):
"""LinearUCB.
An adaptation of the original LinUCB (Lihong Li et al. 2010) to measure
the latent dimensions by a PMF formulation [1]_.
References
----------
.. [1] Zhao, Xiaoxue, Weinan Zhang, and Jun Wang. "Interactive collaborative filtering."
Proceedings of the 22nd ACM international conference on Information & Knowledge Management. 2013.
"""
def __init__(self, alpha, zeta=None, *args, **kwargs):
"""__init__.
Args:
args:
kwargs:
alpha:
"""
super().__init__(*args, **kwargs)
if alpha != None:
self.alpha = alpha
elif zeta != None:
self.alpha = 1 + np.sqrt(np.log(2 / zeta) / 2)
def reset(self, observation):
"""reset.
Args:
observation:
"""
train_dataset = observation
super().reset(train_dataset)
def actions_estimate(self, candidate_actions):
"""actions_estimate.
Args:
candidate_actions: (user id, candidate_items)
Returns:
numpy.ndarray:
"""
uid = candidate_actions[0]
candidate_items = candidate_actions[1]
b = self.bs[uid]
A = self.As[uid]
mean = np.dot(np.linalg.inv(A), b)
cov = np.linalg.inv(A) * self.var
items_score = mean @ self.items_means[candidate_items].T+\
self.alpha*np.sqrt(np.sum(self.items_means[candidate_items].dot(cov) * self.items_means[candidate_items],axis=1))
return items_score, None
def update(self, observation, action, reward, info):
"""update.
Args:
observation:
action: (user id, item)
reward (float): reward
info:
"""
uid = action[0]
item = action[1]
additional_data = info
return super().update(observation, action, reward, info)