tensorflow · laxmareddyp · Feb 29, 2024
@@ -135,6 +135,7 @@ def __init__(
       kernel_regularizer: Optional[tf_keras.regularizers.Regularizer] = None,
       bias_regularizer: Optional[tf_keras.regularizers.Regularizer] = None,
       bn_trainable: bool = True,
+      use_first_projection: bool = True,
       **kwargs):
     """Initializes a ResNet model.
 
@@ -164,6 +165,8 @@ def __init__(
         Default to None.
       bn_trainable: A `bool` that indicates whether batch norm layers should be
         trainable. Default to True.
+      use_first_projection: A `bool` of whether to use the first projection
+        shortcut for small ResNets. See https://github.com/tensorflow/models/issues/10583.
       **kwargs: Additional keyword arguments to be passed.
     """
     self._model_id = model_id
@@ -184,6 +187,7 @@ def __init__(
     self._kernel_regularizer = kernel_regularizer
     self._bias_regularizer = bias_regularizer
     self._bn_trainable = bn_trainable
+    self._use_first_projection = use_first_projection
 
     if tf_keras.backend.image_data_format() == 'channels_last':
       self._bn_axis = -1
@@ -202,12 +206,18 @@ def __init__(
         block_fn = nn_blocks.BottleneckBlock
       else:
         raise ValueError('Block fn `{}` is not supported.'.format(spec[0]))
+      use_first_projection = (
+        spec[0] == 'bottleneck'
+        or i > 0
+        or self._use_first_projection
+      )
       x = self._block_group(
           inputs=x,
           filters=int(spec[1] * self._depth_multiplier),
           strides=(1 if i == 0 else 2),
           block_fn=block_fn,
           block_repeats=spec[2],
+          use_first_projection=use_first_projection,
           stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
               self._init_stochastic_depth_rate, i + 2, 5),
           name='block_group_l{}'.format(i + 2))
@@ -326,6 +336,7 @@ def _block_group(self,
                    strides: int,
                    block_fn: Callable[..., tf_keras.layers.Layer],
                    block_repeats: int = 1,
+                   use_first_projection: bool = True,
                    stochastic_depth_drop_rate: float = 0.0,
                    name: str = 'block_group'):
     """Creates one group of blocks for the ResNet model.
@@ -339,6 +350,8 @@ def _block_group(self,
       block_fn: The type of block group. Either `nn_blocks.ResidualBlock` or
         `nn_blocks.BottleneckBlock`.
       block_repeats: An `int` number of blocks contained in the layer.
+      use_first_projection: A `bool` to determine whether to use the first
+        projection shortcut.
       stochastic_depth_drop_rate: A `float` of drop rate of the current block
         group.
       name: A `str` name for the block.
@@ -349,7 +362,7 @@ def _block_group(self,
     x = block_fn(
         filters=filters,
         strides=strides,
-        use_projection=True,
+        use_projection=use_first_projection,
         stochastic_depth_drop_rate=stochastic_depth_drop_rate,
         se_ratio=self._se_ratio,
         resnetd_shortcut=self._resnetd_shortcut,
@@ -400,7 +413,8 @@ def get_config(self):
         'kernel_initializer': self._kernel_initializer,
         'kernel_regularizer': self._kernel_regularizer,
         'bias_regularizer': self._bias_regularizer,
-        'bn_trainable': self._bn_trainable
+        'bn_trainable': self._bn_trainable,
+        'use_first_projection': self._use_first_projection
     }
     return config_dict
 
@@ -441,4 +455,5 @@ def build_resnet(
       norm_momentum=norm_activation_config.norm_momentum,
       norm_epsilon=norm_activation_config.norm_epsilon,
       kernel_regularizer=l2_regularizer,
-      bn_trainable=backbone_cfg.bn_trainable)
+      bn_trainable=backbone_cfg.bn_trainable,
+      use_first_projection=backbone_cfg.use_first_projection)
@@ -67,6 +67,38 @@ def test_network_creation(self, input_size, model_id,
     self.assertAllEqual(
         [1, input_size / 2**5, input_size / 2**5, 512 * endpoint_filter_scale],
         endpoints['5'].shape.as_list())
+
+    @parameterized.parameters(
+      (128, 18, 1),
+      (128, 34, 1),
+  )
+  def test_network_creation_no_first_shortcut(self, input_size, model_id,
+                                              endpoint_filter_scale):
+    """Test creation of ResNet family models."""
+    resnet_params = {
+        18: 11186112,
+        34: 21301696,
+    }
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    network = resnet.ResNet(model_id=model_id, use_first_projection=False)
+    self.assertEqual(network.count_params(), resnet_params[model_id])
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+
+    self.assertAllEqual(
+        [1, input_size / 2**2, input_size / 2**2, 64 * endpoint_filter_scale],
+        endpoints['2'].shape.as_list())
+    self.assertAllEqual(
+        [1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale],
+        endpoints['3'].shape.as_list())
+    self.assertAllEqual(
+        [1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale],
+        endpoints['4'].shape.as_list())
+    self.assertAllEqual(
+        [1, input_size / 2**5, input_size / 2**5, 512 * endpoint_filter_scale],
+        endpoints['5'].shape.as_list())
 
   @combinations.generate(
       combinations.combine(
@@ -137,7 +169,8 @@ def test_serialize_deserialize(self):
         kernel_initializer='VarianceScaling',
         kernel_regularizer=None,
         bias_regularizer=None,
-        bn_trainable=True)
+        bn_trainable=True,
+        use_first_projection=True)
     network = resnet.ResNet(**kwargs)
 
     expected_config = dict(kwargs)