-
Notifications
You must be signed in to change notification settings - Fork 6
/
main.py
282 lines (238 loc) · 11.1 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# Import libraries and relevant dependencies
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import string
import argparse
from torch.autograd import Variable
# Dyck library
from tasks.dyck_generator import DyckLanguage
# Stack-augmented architectures (Stack-RNN + Stack-LSTM)
from models.rnn_models import VanillaRNN, SRNN_Softmax, SRNN_Softmax_Temperature, SRNN_GumbelSoftmax
from models.lstm_models import VanillaLSTM, SLSTM_Softmax, SLSTM_Softmax_Temperature, SLSTM_GumbelSoftmax
# Baby Neural Turing Machine (Baby-NTM)
from models.ntm_models import BNTM_Softmax, BNTM_SoftmaxTemperature, BNTM_GumbelSoftmax
# Set default tensor type "double"
torch.set_default_tensor_type('torch.DoubleTensor')
# GPU/CPU Check
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") ## GPU stuff
print (device)
# Output threshold
epsilon = 0.5
randomseed_num = 1729
print ('RANDOM SEED: {}'.format(randomseed_num))
random.seed (randomseed_num)
np.random.seed (randomseed_num)
torch.manual_seed(randomseed_num)
def get_args ():
parser = argparse.ArgumentParser(description='parser')
# Dyck language parameters
parser.add_argument('--num_par', default=2, type=int, help='number of parentheses pairs')
parser.add_argument('--p_val', default = 0.5, type=float, help='p value for the PCFG for Dyck-n')
parser.add_argument('--q_val', default = 0.25, type=float, help='q value for the PCFG for Dyck-n')
# Training and test corpura parameters
parser.add_argument('--training_window', default=[2,50], type=int, help='training corpus window')
parser.add_argument('--training_size', default=5000, type=int, help='number of training samples')
parser.add_argument('--test_size', default=1000, type=int, help='number of test samples')
# Model choice
parser.add_argument ('--model_name', type=str, default='baby_ntm_softmax',
choices=['vanilla_rnn', 'vanilla_lstm', 'joulin_mikolov_stack_rnn',
'stack_rnn_softmax', 'stack_rnn_softmax_temp', 'stack_rnn_gumbel_softmax',
'stack_lstm_softmax', 'stack_lstm_softmax_temp', 'stack_lstm_gumbel_softmax',
'baby_ntm_softmax', 'baby_ntm_softmax_temp', 'baby_ntm_gumbel_softmax'],
help='model choice')
parser.add_argument('--n_layers', default=1, type=int, help='number of layers')
parser.add_argument('--n_hidden', default=8, type=int, help='hidden dimension')
# Stack/Memory size
parser.add_argument('--memory_size', default=104, type=int, help='memory/stack size')
parser.add_argument('--memory_dim', default=5, type=int, help='memory/stack dimension')
# Temperature + Annealing rate
parser.add_argument('--temp_min', default=0.5, type=float, help='minimum temperature value')
parser.add_argument('--anneal_rate', default=0.0001, type=float, help='anneal rate')
# Learning rate and number of epochs
parser.add_argument('--lr', default=0.01, type=float, help='learning rate')
parser.add_argument('--n_epoch', default=3, type=int, help='number of epochs')
# Save/load model
parser.add_argument('--save_path', default='', type=str, help='where you want to save the model')
parser.add_argument('--load_path', default='', type=str, help='path of the saved model')
# Parse known arguments
args, _ = parser.parse_known_args ()
return args
def choose_model (model_name, n_hidden, vocab_size, n_layers, memory_size, memory_dim):
if model_name == 'vanilla_rnn':
model = VanillaRNN (n_hidden, vocab_size, vocab_size, n_layers)
elif model_name == 'vanilla_lstm':
model = VanillaLSTM (n_hidden, vocab_size, vocab_size, n_layers)
elif model_name == 'stack_rnn_softmax':
model = SRNN_Softmax (n_hidden, vocab_size, vocab_size, n_layers, memory_size, memory_dim)
elif model_name == 'stack_lstm_softmax':
model = SLSTM_Softmax (n_hidden, vocab_size, vocab_size, n_layers, memory_size, memory_dim)
elif model_name == 'baby_ntm_softmax':
model = BNTM_Softmax (n_hidden, vocab_size, vocab_size, n_layers, memory_size, memory_dim)
elif model_name == 'stack_rnn_softmax_temp':
model = SRNN_Softmax_Temperature (n_hidden, vocab_size, vocab_size, n_layers, memory_size, memory_dim)
elif model_name == 'stack_lstm_softmax_temp':
model = SLSTM_Softmax_Temperature (n_hidden, vocab_size, vocab_size, n_layers, memory_size, memory_dim)
elif model_name == 'baby_ntm_softmax_temp':
model = BNTM_SoftmaxTemperature (n_hidden, vocab_size, vocab_size, n_layers, memory_size, memory_dim)
elif model_name == 'stack_rnn_gumbel_softmax':
model = SRNN_GumbelSoftmax (n_hidden, vocab_size, vocab_size, n_layers, memory_size, memory_dim)
elif model_name == 'stack_lstm_gumbel_softmax':
model = SLSTM_GumbelSoftmax (n_hidden, vocab_size, vocab_size, n_layers, memory_size, memory_dim)
elif model_name == 'baby_ntm_gumbel_softmax':
model = BNTM_GumbelSoftmax (n_hidden, vocab_size, vocab_size, n_layers, memory_size, memory_dim)
elif model_name == 'joulin_mikolov_stack_rnn':
model = SRNN_Softmax (n_hidden, vocab_size, vocab_size, n_layers, memory_size, memory_dim)
else:
print ('Oh dear... something went wrong...')
return model.to(device)
def main ():
args = get_args()
# Parameters of the Probabilistic Dyck Language
NUM_PAR = args.num_par
MIN_SIZE = args.training_window[0]
MAX_SIZE = args.training_window[1]
P_VAL = args.p_val
Q_VAL = args.q_val
# Number of samples in the training corpus
TRAINING_SIZE = args.training_size
# Number of samples in the test corpus
TEST_SIZE = args.test_size
# Create a Dyck language generator
Dyck = DyckLanguage (NUM_PAR, P_VAL, Q_VAL)
all_letters = word_set = Dyck.return_vocab ()
n_letters = vocab_size = len (word_set)
print('Loading data...')
training_input, training_output, st = Dyck.training_set_generator (TRAINING_SIZE, MIN_SIZE, MAX_SIZE)
test_input, test_output, st2 = Dyck.training_set_generator (TEST_SIZE, MAX_SIZE + 2, 2 * MAX_SIZE)
for i in range (1):
print (training_input[i])
print (training_output[i])
print (Dyck.lineToTensor(training_input[i]))
print (Dyck.lineToTensorSigmoid(training_output[i]))
print (test_input[i])
print (test_output[i])
# Number of hidden units
n_hidden = args.n_hidden
# Number of hidden layers
n_layers = args.n_layers
# Memory size
memory_size = args.memory_size
memory_dim = args.memory_dim
# Parameters for the temperature-based methods
temp = 1.0
temp_min = args.temp_min
ANNEAL_RATE = args.anneal_rate
## Stack-RNN with Softmax
model = choose_model (args.model_name, n_hidden, vocab_size, n_layers, memory_size, memory_dim)
# Learning rate
learning_rate = args.lr
# Minimum Squared Error (MSE) loss
criterion = nn.MSELoss()
# Adam optimizer (https://arxiv.org/abs/1412.6980)
optim = torch.optim.Adam(model.parameters(), lr = learning_rate)
print ('Model details:')
print (model)
if args.load_path == '':
# Number of epochs to train our model
epoch_num = args.n_epoch
# Arrays for loss and "moving" accuracy per epoch
loss_arr = []
correct_arr = []
for epoch in range(1, epoch_num + 1):
print ('Epoch: {}'.format(epoch))
# Total loss per epoch
total_loss = 0
# Total number of "correctly" predicted samples so far in the epoch
counter = 0
for i in range (TRAINING_SIZE):
len_input = len (training_input[i])
# Good-old zero grad
model.zero_grad ()
# Initialize the hidden state
hidden = model.init_hidden()
# Initialize the memory
memory = torch.zeros (memory_size, memory_dim).to(device)
# Target values
target = Dyck.lineToTensorSigmoid(training_output[i]).to(device)
# Output values
output_vals = torch.zeros (target.shape).to(device)
# Temperature update -- can uncomment the following if no temp used
temp = np.maximum(temp * np.exp(-ANNEAL_RATE), temp_min)
for j in range (len_input):
output, hidden, memory = model (Dyck.lineToTensor(training_input[i][j]).to(device), hidden, memory, temp)
output_vals [j] = output
# MSE (y, y_bar)
loss = criterion (output_vals, target)
# Add the current loss to the total loss
total_loss += loss.item()
# Backprop!
loss.backward ()
optim.step ()
# Print the performance of the model every 500 steps
if i % 500 == 0:
print ('Sample Number {}: '.format(i))
print ('Input : {}'.format(training_input[i]))
print ('Output: {}'.format(training_output[i]))
print ('* Counter: {}'.format(counter))
print ('* Avg Loss: {}'.format(total_loss/(i+1)))
# Binarize the entries based on the output threshold
out_np = np.int_(output_vals.detach().numpy() >= epsilon)
target_np = np.int_(target.detach().numpy())
# "Moving" training accuracy
if np.all(np.equal(out_np, target_np)) and (out_np.flatten() == target_np.flatten()).all():
counter += 1
# At the end of the epoch, append our total loss and "moving" accuracy
if i == TRAINING_SIZE - 1:
print ('* Moving Accuracy: {}%'.format(float(counter)/TRAINING_SIZE))
loss_arr.append (total_loss)
correct_arr.append(counter)
print ('Moving Accuracy %: ', correct_arr)
print ('Loss: ', loss_arr)
save_path = args.save_path
if save_path == '':
save_path = 'models/{}_model_weights.pth'.format(args.model_name)
# Save the model weights
torch.save(model.state_dict(), save_path)
else:
# Load the model weights
model.load_state_dict(torch.load(args.load_path))
# Training set accuracy
correct_num = test_model (model, Dyck, training_input, training_output, temp)
print ('Training accuracy: {}.'.format(correct_num))
# Test set accuracy
correct_num = test_model (model, Dyck, test_input, test_output, temp)
print ('Test accuracy: {}.'.format(correct_num))
def test_model (model, Dyck, data_input, data_output, temp):
# Turn on the eval mode
model.eval()
# Total number of "correctly" predicted samples
correct_num = 0
with torch.no_grad():
for i in range (len(data_output)):
len_input = len (data_input[i])
model.zero_grad ()
# Initialize the hidden state
hidden = model.init_hidden()
# Initialize the memory
memory = torch.zeros (model.memory_size, model.memory_dim).to(device)
# Target values
target = Dyck.lineToTensorSigmoid(data_output[i]).to(device)
# Output values
output_vals = torch.zeros (target.shape).to(device)
for j in range (len_input):
output, hidden, memory = model (Dyck.lineToTensor(data_input[i][j]).to(device), hidden, memory, temp)
output_vals [j] = output
# Binarize the entries based on the output threshold
out_np = np.int_(output_vals.detach().cpu().numpy() >= epsilon)
target_np = np.int_(target.detach().cpu().numpy())
# (Double-)check whether the output values and the target values are the same
if np.all(np.equal(out_np, target_np)) and (out_np.flatten() == target_np.flatten()).all():
# If so, increase `correct_num` by one
correct_num += 1
return float(correct_num)/len(data_output) * 100, correct_num
if __name__ == "__main__":
main()