/
training_v1.py
296 lines (288 loc) · 18.9 KB
/
training_v1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
'''
Usage:
python training_v1.py
'''
# Create Model 1 architecture and train weights on dataset
import whale_cnn
import whale_cnn_unsup
import numpy
from skimage.transform import resize
from sklearn.metrics import roc_auc_score
from sklearn.cluster import MiniBatchKMeans
from sklearn.feature_extraction import image
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, normalize
from sklearn.metrics.pairwise import pairwise_distances
from keras.callbacks import Callback, ModelCheckpoint, EarlyStopping
from keras import layers
from keras.layers import Input, Lambda, Dense, Reshape, Dropout, Activation, BatchNormalization, Flatten, Conv2D, MaxPooling2D, Concatenate
from keras.models import load_model, Model
from keras.optimizers import Adam
import tensorflow as tf
import keras
import keras.backend as K
K.set_image_data_format('channels_last')
def create_model(num_groups,group_size,kernel_size,input_shape,connections_1f):
''' create_model Method
CNN Model for Right Whale Upcall Recognition with filters learned through K-Means
and energy-correlated receptive fields
Args:
num_groups: int number of groups to split the filters of the first and second
layers into
group_size: int size of the groups created in the first and second layers
kernel_size: int size of the filters in the Conv2D layers (same kernel size
for all three layers)
input_shape: (Number of Samples, Height, Width, Number of Filters)
connections_1f: 2-D matrix of connections created by "learn_connections_unsup"
comprised of "num_groups" rows and "group_size" columns, for
the first layer
Returns:
Compiled Keras CNN Model
'''
# There are three layers: Layer 0 is the convolution of the raw input layer with the
# first set of learned filters (each filter is of depth 1 because the raw input layer
# is of depth 1). Layer 1 corresponds to the second set of learned filters (each filter is
# of depth "group_size" and the learned filters are applied to smaller groups of the
# entire set, each group being of depth "group_size"). Layer 2 corresponds to the third set
# of learned filters (each filter is of depth "group_size" and the learned filters are also
# applied to smaller groups of the entire set, each group being of depth "group_size").
X_input = Input(shape=input_shape,name='input')
# Dropout on the visible layer (1 in 5 probability of dropout)
X = Dropout(0.2,name='dropout0')(X_input)
# BatchNorm on axis=3 (on the axis corresponding to the number of filters)
X = BatchNormalization(axis=3,name='bn0')(X)
# Convolution of Filters with Input. Since the filters are determined through unsupervised
# learning, they are not updated through backpropagation (i.e. trainable=False)
X = Conv2D(filters=num_groups*group_size,kernel_size=kernel_size,use_bias=False,activation='relu',name='conv0',trainable=False)(X)
# Maxpooling for translation invariance and to halve height and width dimensions
X_maps1 = MaxPooling2D(name='maxpool0')(X)
# The filters learned via K-Means for the next layer are trained on groups of feature maps
# instead of the entire set. Specifically, the set of feature maps from X_maps1
# are broken up into "num_groups" groups of "group_size" filters, with the groups
# determined via energy correlation. (The reduced dimensionality improves the performance
# of K-Means). These smaller groups of feature maps are then fed to K-Means to generate
# new filters. Note, however, that it is not possible to apply the learned filters of
# reduced dimensionality immediately to the output of X_maps1 of full dimensionality.
# In order to account for this discrepancy, the output of X_maps1 is split
# into the same groups used for K-Means, and all the learned filters applied to each of
# these smaller groups. For example, suppose X_maps1_masked originally has 64 feature maps,
# and these are split into 16 groups of 4 feature maps. After these groups are fed into
# K-Means and, say, 128 filters are generated, X_maps1_masked is split into the same
# 16 groups of 4 feature maps. Then, the 128 filters are applied to each of these 16
# groups (since each group is the same size as the groups originally fed to K-Means). The
# results are then concatenated along the axis corresponding to the number of filters
# (the last axis) to represent the output of the convolution.
# The dictionaries below are used to implement this grouping mechanism. Note that the
# connections_1f array contains all the groups of feature maps determined via
# energy-correlation. This array is used to slice the original set of feature maps into
# the desired groups.
layers_1 = dict()
# connections_1f is composed of "num_groups" rows and "group_size" columns
# layers_1_lambda, layers_1_reshape, and layers_1_maxpool are dictionaries of
# sub-dictionaries, where each sub-dictionary corresponds to one of the "num_groups" groups
# in connections_1f
layers_1_lambda = dict()
layers_1_reshape = dict()
layers_1_maxpool = dict()
for ii in range(num_groups):
# Instantiate the sub-dictionaries for each main dictionary, for the current group
# under consideration
layers_1[ii] = dict()
layers_1_lambda[ii] = dict()
layers_1_reshape[ii] = dict()
layers_1_maxpool[ii] = dict()
for jj in range(group_size):
# For the current group under consideration (represented by ii) and the current
# member of the group under consideration (represented by jj), use Keras'
# Lambda layer to select that one member of the group (that one feature map)
# from the entire main group. Then use Keras' Reshape layer to reshape the
# dimensions of the output of the Lambda layer to:
# (Number of samples, height, width, 1)
layers_1_lambda[ii]['X_lambda_'+str(ii)+'_'+str(jj)] = Lambda(lambda X: X[:,:,:,connections_1f[ii,jj]],name='lambda1_'+str(ii)+'_'+str(jj))(X_maps1)
layers_1_reshape[ii]['X_reshape_'+str(ii)+'_'+str(jj)] = Reshape((*K.int_shape(layers_1_lambda[ii]['X_lambda_'+str(ii)+'_'+str(jj)])[1:3],1),name='reshape1_'+str(ii)+'_'+str(jj))(layers_1_lambda[ii]['X_lambda_'+str(ii)+'_'+str(jj)])
# After the for loop above, layers_1_reshape[ii] contains all the individual feature
# maps comprising the current group under consideration. Concatenate all of them into
# one group of "group_size" feature maps using keras.layers.concatenate
layers_1_concat = [layer for layer in layers_1_reshape[ii].values()]
layers_1[ii]['X_concat_'+str(ii)] = keras.layers.concatenate(layers_1_concat,name='concat1_'+str(ii))
# Apply BatchNorm along axis=3 as before
layers_1[ii]['X_batchnorm_'+str(ii)] = BatchNormalization(axis=3,name='bn1_'+str(ii))(layers_1[ii]['X_concat_'+str(ii)])
# Apply all the filters learned via K-Means to the current group of feature maps
# under consideration. (Note that samples from all the groups were fed into K-Means to
# learn these filters ("group_size" filters were learned). These
# "group_size" filters were applied to the current group under consideration.
layers_1[ii]['X_conv2d_'+str(ii)] = Conv2D(filters=group_size*2,kernel_size=kernel_size,use_bias=False,activation='relu',name='conv1_'+str(ii),trainable=False)(layers_1[ii]['X_batchnorm_'+str(ii)])
# Maxpooling was applied for translation invariance and to halve the width and height
# dimensions
layers_1_maxpool[ii]['X_maxpool2d_'+str(ii)] = MaxPooling2D(pool_size=(2,2),name='maxpool1_'+str(ii))(layers_1[ii]['X_conv2d_'+str(ii)])
# Concatenate the results for all the groups into one set. This is done because
# connections_2f separates this entire set into energy-correlated groups (presumably
# different groups than specified by connections_1f).
layers_1_final = []
for ii in range(num_groups):
# The sub-dictionaries in the layers_1_maxpool main dictionary contain all the
# results to be concatenated
layers_1_final.extend([layer for layer in layers_1_maxpool[ii].values()])
X_maps2 = keras.layers.concatenate(layers_1_final,name='final1')
# Flatten the output from the first, second, and third layers
X_maps1_f = Flatten(name='flatten1')(X_maps1)
X_maps2_f = Flatten(name='flatten2')(X_maps2)
# Concatenate the flattened outputs into one feature vector
X_maps = keras.layers.concatenate([X_maps1_f,X_maps2_f],name='final_concat')
# Pass the full feature vector to the first fully connected layer
X = Dense(200,activation='relu',name='dense1')(X_maps)
# Dropout on the fully connected layer (1 in 2 probability of dropout)
X = Dropout(0.5,name='dropout3')(X)
# Pass to the second fully connected layer for binary classification
X_output = Dense(1,activation='sigmoid',name='dense2')(X)
# Use Adam optimizer
opt = Adam(lr=0.0001,beta_1=0.9,beta_2=0.999,decay=0.01)
model = Model(inputs=X_input,outputs=X_output)
model.compile(optimizer=opt,loss='binary_crossentropy',metrics=['accuracy'])
return model
# Use the data() method from whale_cnn.py to generate the training and test datasets
# and labels
X_train, Y_train, X_testV, X_testH, Y_test = data()
# Parameters for Training Model:
# Size of Filters (kernel_size x kernel_size) in Keras model for all convolutional layers
kernel_size= 7
# Number of groups of filters
num_groups_f = 32
# Number of feature maps in each group
group_size_f = 8
# Input Shape: (Number of Samples, Height, Width, Number of Filters)
input_shape = (X_train.shape[1],X_train.shape[2],X_train.shape[3])
# Instantiate array for connection matrix as the expected size so that the Keras
# model can be instantiated with the proper architecture. These values will be changed
# once the connections are learned, and the model updated with these learned values
connections_1f = numpy.arange(num_groups_f*group_size_f).reshape(num_groups_f,group_size_f)
connections_1f = numpy.asarray(connections_1f,numpy.int32)
# Instantiate the Keras model with the proper architecture
model = create_model(num_groups_f,group_size_f,kernel_size,input_shape,connections_1f)
# "k_train" is the set of samples fed to K-Means to learn filters unsupervised
k_train = X_train.transpose((0,3,1,2))
# The "separate_trainlabels" function shuffles the training set (and training labels),
# and yields the indices to replicate the shuffling operation. The function also generates
# "Y_pos" and "Y_neg," arrays containing indices of the samples in the training set belonging
# to the positive and negative class, respectively. ("Y_pos" and "Y_neg" are each set to
# contain 100 samples). Equal number of samples from each of these arrays are therefore given
# to K-Means, so that K-Means is able to learn its dictionary of filters based off features
# from both the positive and negative class.
Y_pos,Y_neg,indices = separate_trainlabels(Y_train,num_perclass=100)
# Shuffle k_train, model_train, and model_labels according to the indices specified by
# "separate_trainlabels"
k_train = k_train[indices]
model_train = X_train[indices]
model_labels = Y_train[indices]
# Feed 50 examples from the positive class and 50 examples from the negative class to
# K-Means to learn a dictionary of filters. Identify those examples in k_train using the
# indices in the Y_pos and Y_neg arrays
k_train = numpy.concatenate((k_train[Y_pos[0:50]],k_train[Y_neg[0:50]]),axis=0)
# Use the "MiniBatchKMeansAutoConv" function to learn the dictionary of filters. Extract
# 1/3 of the total number of patches randomly from each sample for training, learn
# num_groups_f*group_size_f filters (centroids), and do not use recursive autoconvolution
centroids_0 = MiniBatchKMeansAutoConv(k_train,(kernel_size,kernel_size),0.33,num_groups_f*group_size_f,[0])
# The learned filters (centroids) will be set as the weights of the "conv0" layer.
layer_toset = model.get_layer('conv0')
# Reshape the learned filters so that they are the same shape as expected by the Keras layer
filters = centroids_0.transpose((2,3,1,0))
filters = filters[numpy.newaxis,...]
layer_toset.set_weights(filters)
# Construct an intermediate model (using model_fp) that generates the output of layer
# "maxpool0." This is the output that the next set of learned filters will be convolved with.
# Therefore, generate the output for further processing (i.e. learning filter connections)
# For sake of efficiency, continue using only the 100 samples in Y_pos and 100 samples in Y_neg
# for the processing below (i.e. forward pass only these 200 samples from the training set
# through the Keras model and obtain the output of layer "maxpool0" for only these samples).
fp_train = numpy.concatenate((model_train[Y_pos],model_train[Y_neg]),axis=0)
output_0 = model_fp(model,fp_train,'maxpool0')
# Transpose the output into shape: (Number of samples, number of filters, height, width)
# for "learn_connections_unsup" and "MiniBatchKMeansAutoConv"
output_0 = output_0.transpose((0,3,1,2))
# For sake of efficiency, use only the first 10 and last 10 examples from output_0 for
# learning filter connections via "learn_connections_unsup"
output_0_learnf = numpy.concatenate((output_0[0:10],output_0[-10:]),axis=0)
# Use the "learn_connections_unsup" function to find "num_groups_f" groups of feature maps
# of size "group_size" that are most strongly correlated (using the "energy correlation"
# pairwise similarity metric). In this case, to calculate the pairwise similaritiy metrics,
# the "samples" are the feature maps (depth dimension) and the "features" are the elements in
# each feature map (height*width dimension)
connections_1f = learn_connections_unsup(output_0_learnf,num_groups_f,group_size_f,flag=0)
# The feature maps fed into K-Means to learn the next set of filters are split into groups of
# reduced dimensionality (to boost performance), with the groups determined by the results
# of connections_1f. Achieve this splitting by repeating output_0 "num_groups_f" times, each
# time using only the filters in one of the "num_groups_f" groups of connections_1f.
# "output_0_connected" holds all these versions of output_0 with the different sets of
# filters selected. K-Means will receive samples from each of these different versions of
# output_0 to learn the next dictionary of filters
output_0_connected = numpy.zeros((output_0.shape[0]*num_groups_f,group_size_f,output_0.shape[2],output_0.shape[3]))
index = 0
# For sake of efficiency, do not use all the samples in each version of output_0. Only use
# the first and last 10 samples in each version of output_0. "k_train_indices" keeps track
# of what all these indices are, so that the appropriate samples may be selected for feeding
# into K-Means
k_train_indices = []
for ii in range(num_groups_f):
output_0_connected[index:index+output_0.shape[0],:,:,:] = output_0[:,connections_1f[ii,:],:,:]
temp1 = numpy.arange(index,index+10)
k_train_indices.extend(temp1)
temp2 = numpy.arange((index+output_0.shape[0])-10,(index+output_0.shape[0]))
k_train_indices.extend(temp2)
index = index + output_0.shape[0]
k_train = output_0_connected[k_train_indices]
# Use the "MiniBatchKMeansAutoConv" function to learn the dictionary of filters. Extract
# 1/2 of the total number of patches randomly from each sample for training, learn
# group_size_f filters (centroids), and do not use recursive autoconvolution
# Since group_size_f filters are learned, all the filters convolved with each of
# the "num_groups_f" groups in the Keras model, and the results concatenated, there will be
# a total of num_groups_f*group_size_f filters in the next layer.
centroids_1 = MiniBatchKMeansAutoConv(k_train,(kernel_size,kernel_size),0.5,group_size_f*2,[0])
# Re-instantiate the model with the newly-set connections_1f and connections_1m
model = create_model(num_groups_f,group_size_f,kernel_size,input_shape,connections_1f)
# Since the model is re-instantiated, re-set the filters for "conv_0"
layer_toset = model.get_layer('conv0')
filters = centroids_0.transpose((2,3,1,0))
filters = filters[numpy.newaxis,...]
layer_toset.set_weights(filters)
# Since the the new set of learned filters is convolved with each of the smaller groups of
# feature maps of reduced dimensionality, each learned filter is the same shape (height, width,
# depth) as each of these smaller groups. Therefore, there is one "conv" layer for each of
# these smaller groups in the Keras model that will be set to the newly-learned set of filters.
# Set those filters using the for loop below
for ii in range(num_groups_f):
layer_toset = model.get_layer('conv1_'+str(ii))
filters = centroids_1.transpose((2,3,1,0))
filters = filters[numpy.newaxis,...]
layer_toset.set_weights(filters)
# Now that the model is correctly instantiated with all learned filters, initiate training
# Use the ModelCheckpoint callback to save model weights only when there is an improvement in
# "roc_auc_val" score. Save the weights to "model_filepath."
model_filepath = 'weights.best.hdf5'
checkpoint = ModelCheckpoint(model_filepath,monitor='roc_auc_val',verbose=1,save_best_only=True,mode='max')
# Stop training early if the "roc_auc_val" score does not improve for 5 epochs using the
# EarlyStopping callback
early_stopping = EarlyStopping(monitor='roc_auc_val',patience=5,mode='max')
# Use the roc_callback class for training. Every epoch, calculate the "roc_auc_val"
# score on the "X_testV" dataset using "Y_test" as labels. This score is representative
# of the "roc_auc_val" score on the test set (actual test "roc_auc_val" score
# calculated after fitting)
validation_train = X_testV
validation_labels = Y_test
callbacks_list = [roc_callback(validation_data=(validation_train,validation_labels)),checkpoint,early_stopping]
# Fit the model using the callbacks list for 500 epochs and a batch size of 100
model.fit(model_train,model_labels,callbacks=callbacks_list,epochs=500,batch_size=100)
# To calculate the test "roc_auc_val" score:
# Generate predicted labels for the vertically-enhanced test feature matrix and
# predicted labels for the horizontally-enhanced test feature matrix.
# The final predicted label is the union of these two predicted labels.
# For example, if the vertically-enhanced image predicts label 0, but the
# horizontally-enhanced version of the same image predicts label 1, the label is
# determined to be label 1. If both sets predict label 0, the label is 0. If both sets
# predict 1, the label is 1. The union operation is implemented by adding both
# predicted label vectors and setting the maximum value to 1
Y_predV = model.predict(X_testV)
Y_predH = model.predict(X_testH)
Y_pred = Y_predV + Y_predH
Y_pred[Y_pred>1] = 1
score = roc_auc_score(Y_test,Y_predV)
print('Test ROC_AUC Score = ' + str(score))