/
linear_ts.py
108 lines (88 loc) · 3.12 KB
/
linear_ts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from .linear_icf import LinearICF
import numpy as np
from numba import jit
@jit(nopython=True)
def _central_limit_theorem(k):
p = len(k)
x = (np.sum(k) - p / 2) / (np.sqrt(p / 12))
return x
@jit(nopython=True)
def _numba_multivariate_normal(mean, cov):
n = len(mean)
cov_eig = np.linalg.eigh(cov) # suppose that the matrix is symmetric
x = np.zeros(n)
for i in range(n):
x[i] = _central_limit_theorem(
np.random.uniform(0, 1, 200)
) # best parameter is 20000 in terms of speed and accuracy in distribution sampling
return ((np.diag(cov_eig[0])**(0.5)) @ cov_eig[1].T @ x) + mean
@jit(nopython=True)
def _sample_items_weights(user_candidate_items, items_means, items_covs):
n = len(user_candidate_items)
num_lat = items_means.shape[1]
qs = np.zeros((n, num_lat))
for i, item in enumerate(user_candidate_items):
item_mean = items_means[item]
item_cov = items_covs[item]
qs[i] = _numba_multivariate_normal(item_mean, item_cov)
return qs
class LinearThompsonSampling(LinearICF):
"""Linear Thompson Sampling.
An adaptation of the original Thompson Sampling to measure the latent dimensions by a PMF formulation [1]_.
References
----------
.. [1] Abeille, Marc, and Alessandro Lazaric. "Linear thompson sampling revisited."
Artificial Intelligence and Statistics. PMLR, 2017.
"""
def __init__(self, *args, **kwargs):
"""__init__.
Args:
args:
kwargs:
"""
super().__init__(*args, **kwargs)
def reset(self, observation):
"""reset.
Args:
observation:
"""
train_dataset = observation
super().reset(train_dataset)
self.train_dataset = train_dataset
self.num_total_items = self.train_dataset.num_total_items
self.num_latent_factors = len(self.items_means[0])
def actions_estimate(self, candidate_actions):
"""actions_estimate.
Args:
candidate_actions: (user id, candidate_items)
Returns:
numpy.ndarray:
"""
uid = candidate_actions[0]
candidate_items = candidate_actions[1]
b = self.bs[uid]
A = self.As[uid]
mean = np.dot(np.linalg.inv(A), b)
cov = np.linalg.inv(A) * self.var
p = np.random.multivariate_normal(mean, cov)
qs = _sample_items_weights(candidate_items, self.items_means,
self.items_covs)
items_score = p @ qs.T
return items_score, {'qs': qs, 'candidate_items': candidate_items}
def update(self, observation, action, reward, info):
"""update.
Args:
observation:
action: (user id, item)
reward (float): reward
info:
"""
uid = action[0]
item = action[1]
A = self.As[uid]
b = self.bs[uid]
additional_data = info
max_q = additional_data['qs'][np.argmax(
item == additional_data['candidate_items']), :]
A += max_q[:, None].dot(max_q[None, :])
b += reward * max_q