/
model.py
203 lines (169 loc) · 8.27 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import tensorflow as tf
import numpy as np
from datetime import datetime
from network_utils import Weight, Bias
tfd = tf.contrib.distributions
tfb = tfd.bijectors
tf_dtype = tf.float32
np_dtype = np.float32
class Model():
def __init__(self, obs, features, priors, sigma, dt, T, p, pars, network_pars):
weights = {}
# Initialise RNN layers
with tf.variable_scope('input_layer'):
weights['w0'] = Weight(
[1, network_pars['n_inputs'], network_pars['hidden_layer_width']], tf_dtype).tile(p)
weights['b0'] = Bias([1, 1, network_pars['hidden_layer_width']], tf_dtype).tile(p)
for i in range(1, network_pars['n_hidden_layers'] + 1):
with tf.variable_scope('hidden_layer_%d' % i):
weights['w%i' % i] = Weight(
[1, network_pars['hidden_layer_width'], network_pars['hidden_layer_width']], tf_dtype).tile(p)
weights['b%i' % i] = Bias(
[1, 1, network_pars['hidden_layer_width']], tf_dtype).tile(p)
with tf.variable_scope('output_layer'):
weights['wn'] = Weight(
[1, network_pars['hidden_layer_width'], network_pars['n_outputs']], tf_dtype).tile(p)
weights['bn'] = Bias([1, 1, network_pars['n_outputs']], tf_dtype).tile(p)
self.sigma = sigma
self.weights = weights
self.n = obs.shape[0]
self.network_pars = network_pars
self.obs = obs
self.pars = pars
self.priors = priors
self.features = features
self.p = p
self.dt = dt
self.T = dt
# Build the TensorFlow graph
self._build()
def _build(self):
'''
Build model graph
'''
print("Building graph...")
# Build diffusion bridge functions using RNN for all p simulations at once
# Modified original implementation to accommodate N coupled equations (2N in total)
# Should be functional
with tf.name_scope('diffusion_bridge'):
v_paths, v_mu, v_sigma = self._diff_bridge()
# ELBO loss not implemented yet for new model specification
# with tf.name_scope('ELBO'):
# loss_estimate = ELBO(
# self.obs, v_paths, v_mu, v_sigma, self.pars, self.priors, self.p, self.dt, self.T)
# tf.summary.scalar('mean_loss', mean_loss)
# Optimiser should be functional
# specifying optimizer and gradient clipping for backprop
with tf.name_scope('optimize'):
opt = tf.train.AdamOptimizer(1e-3)
gradients, variables = zip(
*opt.compute_gradients(mean_loss))
global_norm = tf.global_norm(gradients)
gradients, _ = tf.clip_by_global_norm(gradients, 4e3)
self.train_step = opt.apply_gradients(
zip(gradients, variables))
tf.summary.scalar('global_grad_norm', global_norm)
# To implement: report variables to tensorboard
# with tf.name_scope('variables'):
# with tf.name_scope('theta1'):
# tf.summary.scalar('theta1_mean', c1_mean)
# tf.summary.scalar('theta1_std', c1_std)
# with tf.name_scope('theta2'):
# tf.summary.scalar('theta2_mean', c2_mean)
# tf.summary.scalar('theta2_std', c2_std)
# with tf.name_scope('theta3'):
# tf.summary.scalar('theta3_mean', c3_mean)
# tf.summary.scalar('theta3_std', c3_std)
# self.merged = tf.summary.merge_all()
def _diff_bridge(self):
'''
Roll out rnn cell across the time series
'''
x01 = tf.expand_dims(self.params['x0'].sample(self.p), axis = 2)
zeros = tf.zeros_like(x01)
x0 = tf.concat((x01, zeros), axis = 2)
t1 = tf.expand_dims(self.features['next_times'][...,0], axis = 2)
y1 = tf.expand_dims(self.features['next_obs'][...,0], axis = 2)
input_features = tf.concat([x0, zeros, t1, y1, y1 - x0], axis = 2)
v_mu, v_sigma = self._rnn_cell(input_features)
v_mu_store = v_mu
v_sigma_store = tf.reshape(v_sigma, [self.p, self.n, 1, 4])
v_path = self._path_sampler(tf.expand_dims(x0, axis = 3), v_mu, v_sigma)
v_path_store = tf.concat((x0, v_path), axis = 3)
for i in range(1, int(self.T / self.dt)):
xt = tf.squeeze(v_path)
y_next = tf.expand_dims(self.features['next_obs'][...,i], axis = 2)
t = self.dt + input_features[...,2]
t_next = tf.expand_dims(self.features['next_times'][...,i], axis = 2)
y_curr_exp = tf.expand_dims(tf.reduce_sum(xt, axis = 2), axis = 2)
input_features = tf.concat([xt, t, t_next, y_next, y_next - y_curr_exp], axis = 2)
v_mu, v_sigma = self._rnn_cell(input_features)
v_mu_store = tf.concat((v_mu_store, v_mu), axis = 3)
v_sigma_store = tf.concat((v_sigma_store, tf.reshape(v_sigma, [self.p, self.n, 1, 4])), axis = 2)
v_path = self._path_sampler(tf.expand_dims(xt, axis = 3), v_mu, v_sigma)
v_path_store = tf.concat((v_path_store, v_path), axis = 3)
return v_path_store, v_mu_store, v_sigma_store
# the rnn cell called by diff_bridge
def _rnn_cell(self, features_input, eps_identity=1e-3):
'''
RNN cell for supplying Gaussian state transitions
:param features_input: input features for \tilde\alpha and \tilde\beta
:param eps: eps * identity added to diffusion matrix to control numerical stability
'''
hidden_layer = tf.nn.relu(
tf.add(tf.matmul(features_input, self.weights['w0']), self.weights['b0']))
for i in range(1, self.network_pars['n_hidden_layers'] + 1):
hidden_layer = tf.nn.relu(
tf.add(tf.matmul(hidden_layer, self.weights['w%i' % i]), self.weights['b%i' % i]))
output = tf.add(
tf.matmul(hidden_layer, self.weights['wn']), self.weights['bn'])
v_mu, v_sigma_11, v_sigma_21, v_sigma_22 = tf.split(output, [2, 1, 1, 1], 2)
zeros = tf.zeros((self.p,self.n,1,1), dtype = tf_dtype)
v_sigma_chol = tf.concat((tf.concat((tf.expand_dims(tf.nn.softplus(v_sigma_11), axis = 3), zeros), axis = 3),
tf.concat((tf.expand_dims(v_sigma_21, axis = 3),
tf.expand_dims(tf.nn.softplus(v_sigma_22), axis = 3)), axis = 3)), axis = 2)
v_sigma_chol = tf.cholesky(tf.matmul(v_sigma_chol, v_sigma_chol, transpose_b = True) +
eps_identity * tf.tile(tf.reshape(tf.eye(2), [1,1,2,2]),[self.p, self.n, 1, 1]))
return tf.reshape(v_mu, [self.p, self.n, 2, 1]), v_sigma_chol
# functions to return p simulations of a diffusion bridge
def _path_sampler(self, xt, v_mu, v_sigma):
'''
Sample new state using learned Gaussian state transitions
:param xt: the current state
:param v_mu: variational drift vector from RNN
:param v_sigma: variational diffusion matrix from RNN as cholesky factor
'''
xtplus_dist = tfd.TransformedDistribution(distribution=tfd.MultivariateNormalTriL(
loc = xt + self.dt * v_mu, scale_tril = tf.sqrt(self.dt) * v_sigma),
bijector=tfb.Softplus(event_ndims = 2))
xtplus = tf.reshape(xtplus_dist.sample(), [self.p, self.n, 2, 1])
return xtplus
# train the model
def train(self, niter, path):
'''
trains model
:pars niter: number of iterations
:pars path: path to tensorboard output
'''
print("Training model...")
writer = tf.summary.FileWriter(
'%s/%s' % (path, datetime.now().strftime("%d:%m:%y-%H:%M:%S")), sess.graph)
for i in range(niter):
self.train_step.run()
if i % 10 == 0:
summary = sess.run(self.merged)
writer.add_summary(summary, i)
def save(self, path):
'''
save model
'''
saver = tf.train.Saver()
saver.save(sess, path)
print("Model Saved")
def load(self, path):
'''
load model
'''
saver = tf.train.Saver()
saver.restore(sess, path)
print("Model Restored")