/
shufflenet.py
124 lines (88 loc) · 3.48 KB
/
shufflenet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from keras.models import Model
from keras.layers import *
from keras.activations import *
from keras.callbacks import *
import keras.backend as K
def _stage(tensor, nb_groups, in_channels, out_channels, repeat):
x = _shufflenet_unit(tensor, nb_groups, in_channels, out_channels, 2)
for _ in range(repeat):
x = _shufflenet_unit(x, nb_groups, out_channels, out_channels, 1)
return x
def _pw_group(tensor, nb_groups, in_channels, out_channels):
"""Pointwise grouped convolution."""
nb_chan_per_grp = in_channels // nb_groups
pw_convs = []
for grp in range(nb_groups):
x = Lambda(lambda x: x[:, :, :, nb_chan_per_grp * grp: nb_chan_per_grp * (grp + 1)])(tensor)
grp_out_chan = int(out_channels / nb_groups + 0.5)
pw_convs.append(
Conv2D(grp_out_chan,
kernel_size=(1, 1),
padding='same',
use_bias=False,
strides=1)(x)
)
return Concatenate(axis=-1)(pw_convs)
def _shuffle(x, nb_groups):
def shuffle_layer(x):
_, w, h, n = K.int_shape(x)
nb_chan_per_grp = n // nb_groups
x = K.reshape(x, (-1, w, h, nb_chan_per_grp, nb_groups))
x = K.permute_dimensions(x, (0, 1, 2, 4, 3)) # Transpose only grps and chs
x = K.reshape(x, (-1, w, h, n))
return x
return Lambda(shuffle_layer)(x)
def _shufflenet_unit(tensor, nb_groups, in_channels, out_channels, strides, shuffle=True, bottleneck=4):
bottleneck_channels = out_channels // bottleneck
x = _pw_group(tensor, nb_groups, in_channels, bottleneck_channels)
x = BatchNormalization()(x)
x = Activation('relu')(x)
if shuffle:
x = _shuffle(x, nb_groups)
x = DepthwiseConv2D(kernel_size=(3, 3),
padding='same',
use_bias=False,
strides=strides)(x)
x = BatchNormalization()(x)
x = _pw_group(x, nb_groups, bottleneck_channels,
out_channels if strides < 2 else out_channels - in_channels)
x = BatchNormalization()(x)
if strides < 2:
x = Add()([tensor, x])
else:
avg = AveragePooling2D(pool_size=(3, 3),
strides=2,
padding='same')(tensor)
x = Concatenate(axis=-1)([avg, x])
x = Activation('relu')(x)
return x
def _info(nb_groups):
return {
1: [24, 144, 288, 576],
2: [24, 200, 400, 800],
3: [24, 240, 480, 960],
4: [24, 272, 544, 1088],
8: [24, 384, 768, 1536]
}[nb_groups], [None, 3, 7, 3]
def ShuffleNet(input_shape, nb_classes, include_top=True, weights=None, nb_groups=8):
x_in = Input(shape=input_shape)
x = Conv2D(24,
kernel_size=(3, 3),
strides=2,
use_bias=False,
padding='same')(x_in)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(3, 3),
strides=2,
padding='same')(x)
channels_list, repeat_list = _info(nb_groups)
for i, (out_channels, repeat) in enumerate(zip(channels_list[1:], repeat_list[1:]), start=1):
x = _stage(x, nb_groups, channels_list[i-1], out_channels, repeat)
if include_top:
x = GlobalAveragePooling2D()(x)
x = Dense(nb_classes, activation='softmax')(x)
model = Model(inputs=x_in, outputs=x)
if weights is not None:
model.load_weights(weights, by_name=True)
return model