Merge pull request #10338 from srihari-humbarwadi:readme

PiperOrigin-RevId: 413033276

Merge pull request #10338 from srihari-humbarwadi:readme
PiperOrigin-RevId: 413033276
c57e975a · saberkun · 7fb4f3cd · acf4156e · c57e975a · c57e975a
Commit c57e975a authored Nov 29, 2021 by saberkun
20 changed files
--- a/official/vision/beta/modeling/layers/nn_blocks.py
+++ b/official/vision/beta/modeling/layers/nn_blocks.py
@@ -32,9 +32,7 @@ def _pad_strides(strides: int, axis: int) -> Tuple[int, int, int, int]:
    return (1, strides, strides, 1)


-def _maybe_downsample(x: tf.Tensor,
-                      out_filter: int,
-                      strides: int,
+def _maybe_downsample(x: tf.Tensor, out_filter: int, strides: int,
                      axis: int) -> tf.Tensor:
  """Downsamples feature map and 0-pads tensor if in_filter != out_filter."""
  data_format = 'NCHW' if axis == 1 else 'NHWC'
@@ -499,6 +497,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
               activation='relu',
               se_inner_activation='relu',
               se_gating_activation='sigmoid',
+               se_round_down_protect=True,
               expand_se_in_filters=False,
               depthwise_activation=None,
               use_sync_bn=False,
@@ -534,6 +533,8 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
      se_inner_activation: A `str` name of squeeze-excitation inner activation.
      se_gating_activation: A `str` name of squeeze-excitation gating
        activation.
+      se_round_down_protect: A `bool` of whether round down more than 10%
+        will be allowed in SE layer.
      expand_se_in_filters: A `bool` of whether or not to expand in_filter in
        squeeze and excitation layer.
      depthwise_activation: A `str` name of the activation function for
@@ -575,6 +576,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
    self._se_inner_activation = se_inner_activation
    self._se_gating_activation = se_gating_activation
    self._depthwise_activation = depthwise_activation
+    self._se_round_down_protect = se_round_down_protect
    self._kernel_initializer = kernel_initializer
    self._norm_momentum = norm_momentum
    self._norm_epsilon = norm_epsilon
@@ -654,6 +656,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
          out_filters=expand_filters,
          se_ratio=self._se_ratio,
          divisible_by=self._divisible_by,
+          round_down_protect=self._se_round_down_protect,
          kernel_initializer=self._kernel_initializer,
          kernel_regularizer=self._kernel_regularizer,
          bias_regularizer=self._bias_regularizer,
@@ -702,6 +705,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
        'activation': self._activation,
        'se_inner_activation': self._se_inner_activation,
        'se_gating_activation': self._se_gating_activation,
+        'se_round_down_protect': self._se_round_down_protect,
        'expand_se_in_filters': self._expand_se_in_filters,
        'depthwise_activation': self._depthwise_activation,
        'dilation_rate': self._dilation_rate,
@@ -738,8 +742,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
    x = self._conv2(x)
    x = self._norm2(x)

-    if (self._use_residual and
-        self._in_filters == self._out_filters and
+    if (self._use_residual and self._in_filters == self._out_filters and
        self._strides == 1):
      if self._stochastic_depth:
        x = self._stochastic_depth(x, training=training)
@@ -859,8 +862,9 @@ class ResidualInner(tf.keras.layers.Layer):
    base_config = super(ResidualInner, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

-  def call(
-      self, inputs: tf.Tensor, training: Optional[bool] = None) -> tf.Tensor:
+  def call(self,
+           inputs: tf.Tensor,
+           training: Optional[bool] = None) -> tf.Tensor:
    x = inputs
    if self._batch_norm_first:
      x = self._batch_norm_0(x, training=training)
@@ -993,8 +997,9 @@ class BottleneckResidualInner(tf.keras.layers.Layer):
    base_config = super(BottleneckResidualInner, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

-  def call(
-      self, inputs: tf.Tensor, training: Optional[bool] = None) -> tf.Tensor:
+  def call(self,
+           inputs: tf.Tensor,
+           training: Optional[bool] = None) -> tf.Tensor:
    x = inputs
    if self._batch_norm_first:
      x = self._batch_norm_0(x, training=training)
@@ -1063,20 +1068,23 @@ class ReversibleLayer(tf.keras.layers.Layer):

  def _ckpt_non_trainable_vars(self):
    self._f_non_trainable_vars = [
-        v.read_value() for v in self._f.non_trainable_variables]
+        v.read_value() for v in self._f.non_trainable_variables
+    ]
    self._g_non_trainable_vars = [
-        v.read_value() for v in self._g.non_trainable_variables]
+        v.read_value() for v in self._g.non_trainable_variables
+    ]

  def _load_ckpt_non_trainable_vars(self):
-    for v, v_chkpt in zip(
-        self._f.non_trainable_variables, self._f_non_trainable_vars):
+    for v, v_chkpt in zip(self._f.non_trainable_variables,
+                          self._f_non_trainable_vars):
      v.assign(v_chkpt)
-    for v, v_chkpt in zip(
-        self._g.non_trainable_variables, self._g_non_trainable_vars):
+    for v, v_chkpt in zip(self._g.non_trainable_variables,
+                          self._g_non_trainable_vars):
      v.assign(v_chkpt)

-  def call(
-      self, inputs: tf.Tensor, training: Optional[bool] = None) -> tf.Tensor:
+  def call(self,
+           inputs: tf.Tensor,
+           training: Optional[bool] = None) -> tf.Tensor:

    @tf.custom_gradient
    def reversible(
@@ -1101,12 +1109,12 @@ class ReversibleLayer(tf.keras.layers.Layer):
        fwdtape.watch(x)
        x1, x2 = tf.split(x, num_or_size_splits=2, axis=self._axis)
        f_x2 = self._f(x2, training=training)
-        x1_down = _maybe_downsample(
-            x1, f_x2.shape[self._axis], self._f.strides, self._axis)
+        x1_down = _maybe_downsample(x1, f_x2.shape[self._axis], self._f.strides,
+                                    self._axis)
        z1 = f_x2 + x1_down
        g_z1 = self._g(z1, training=training)
-        x2_down = _maybe_downsample(
-            x2, g_z1.shape[self._axis], self._f.strides, self._axis)
+        x2_down = _maybe_downsample(x2, g_z1.shape[self._axis], self._f.strides,
+                                    self._axis)
        y2 = x2_down + g_z1

        # Equation 8: https://arxiv.org/pdf/1707.04585.pdf
@@ -1114,17 +1122,17 @@ class ReversibleLayer(tf.keras.layers.Layer):
        y1 = tf.identity(z1)
        y = tf.concat([y1, y2], axis=self._axis)

-        irreversible = (
-            (self._f.strides != 1 or self._g.strides != 1)
-            or (y.shape[self._axis] != inputs.shape[self._axis]))
+        irreversible = ((self._f.strides != 1 or self._g.strides != 1) or
+                        (y.shape[self._axis] != inputs.shape[self._axis]))

        # Checkpointing moving mean/variance for batch normalization layers
        # as they shouldn't be updated during the custom gradient pass of f/g.
        self._ckpt_non_trainable_vars()

-      def grad_fn(dy: tf.Tensor,
-                  variables: Optional[List[tf.Variable]] = None,
-                  ) -> Tuple[List[tf.Tensor], List[tf.Tensor]]:
+      def grad_fn(
+          dy: tf.Tensor,
+          variables: Optional[List[tf.Variable]] = None,
+      ) -> Tuple[List[tf.Tensor], List[tf.Tensor]]:
        """Given dy calculate (dy/dx)|_{x_{input}} using f/g."""
        if irreversible or not self._manual_grads:
          grads_combined = fwdtape.gradient(
@@ -1158,16 +1166,12 @@ class ReversibleLayer(tf.keras.layers.Layer):

          # Compute gradients
          g_grads_combined = gtape.gradient(
-              g_z1,
-              [z1] + self._g.trainable_variables,
-              output_gradients=dy2)
+              g_z1, [z1] + self._g.trainable_variables, output_gradients=dy2)
          dz1 = dy1 + g_grads_combined[0]  # line 5
          dwg = g_grads_combined[1:]  # line 9

          f_grads_combined = ftape.gradient(
-              f_x2,
-              [x2] + self._f.trainable_variables,
-              output_gradients=dz1)
+              f_x2, [x2] + self._f.trainable_variables, output_gradients=dz1)
          dx2 = dy2 + f_grads_combined[0]  # line 6
          dwf = f_grads_combined[1:]  # line 8
          dx1 = dz1  # line 7
@@ -1263,10 +1267,8 @@ class DepthwiseSeparableConvBlock(tf.keras.layers.Layer):
        'filters': self._filters,
        'strides': self._strides,
        'regularize_depthwise': self._regularize_depthwise,
-        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
        'kernel_initializer': self._kernel_initializer,
        'kernel_regularizer': self._kernel_regularizer,
-        'bias_regularizer': self._bias_regularizer,
        'activation': self._activation,
        'use_sync_bn': self._use_sync_bn,
        'norm_momentum': self._norm_momentum,
@@ -1314,3 +1316,196 @@ class DepthwiseSeparableConvBlock(tf.keras.layers.Layer):
    x = self._conv1(x)
    x = self._norm1(x)
    return self._activation_fn(x)
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class TuckerConvBlock(tf.keras.layers.Layer):
+  """An Tucker block (generalized bottleneck)."""
+
+  def __init__(self,
+               in_filters,
+               out_filters,
+               input_compression_ratio,
+               output_compression_ratio,
+               strides,
+               kernel_size=3,
+               stochastic_depth_drop_rate=None,
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               use_sync_bn=False,
+               divisible_by=1,
+               use_residual=True,
+               norm_momentum=0.99,
+               norm_epsilon=0.001,
+               **kwargs):
+    """Initializes an inverted bottleneck block with BN after convolutions.
+
+    Args:
+      in_filters: An `int` number of filters of the input tensor.
+      out_filters: An `int` number of filters of the output tensor.
+      input_compression_ratio: An `float` of compression ratio for
+        input filters.
+      output_compression_ratio: An `float` of compression ratio for
+        output filters.
+      strides: An `int` block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      kernel_size: An `int` kernel_size of the depthwise conv layer.
+      stochastic_depth_drop_rate: A `float` or None. if not None, drop rate for
+        the stochastic depth layer.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
+        Default to None.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      divisible_by: An `int` that ensures all inner dimensions are divisible by
+        this number.
+      use_residual: A `bool` of whether to include residual connection between
+        input and output.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(TuckerConvBlock, self).__init__(**kwargs)
+
+    self._in_filters = in_filters
+    self._out_filters = out_filters
+    self._input_compression_ratio = input_compression_ratio
+    self._output_compression_ratio = output_compression_ratio
+    self._strides = strides
+    self._kernel_size = kernel_size
+    self._divisible_by = divisible_by
+    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
+    self._use_sync_bn = use_sync_bn
+    self._use_residual = use_residual
+    self._activation = activation
+    self._kernel_initializer = kernel_initializer
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+
+    if use_sync_bn:
+      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = tf.keras.layers.BatchNormalization
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+
+  def build(self, input_shape):
+    input_compressed_filters = nn_layers.make_divisible(
+        value=self._in_filters * self._input_compression_ratio,
+        divisor=self._divisible_by,
+        round_down_protect=False)
+
+    self._conv0 = tf.keras.layers.Conv2D(
+        filters=input_compressed_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm0 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    self._activation_layer0 = tf_utils.get_activation(
+        self._activation, use_keras_layer=True)
+
+    output_compressed_filters = nn_layers.make_divisible(
+        value=self._out_filters * self._output_compression_ratio,
+        divisor=self._divisible_by,
+        round_down_protect=False)
+
+    self._conv1 = tf.keras.layers.Conv2D(
+        filters=output_compressed_filters,
+        kernel_size=self._kernel_size,
+        strides=self._strides,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm1 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    self._activation_layer1 = tf_utils.get_activation(
+        self._activation, use_keras_layer=True)
+
+    # Last 1x1 conv.
+    self._conv2 = tf.keras.layers.Conv2D(
+        filters=self._out_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm2 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+
+    if self._stochastic_depth_drop_rate:
+      self._stochastic_depth = nn_layers.StochasticDepth(
+          self._stochastic_depth_drop_rate)
+    else:
+      self._stochastic_depth = None
+    self._add = tf.keras.layers.Add()
+
+    super(TuckerConvBlock, self).build(input_shape)
+
+  def get_config(self):
+    config = {
+        'in_filters': self._in_filters,
+        'out_filters': self._out_filters,
+        'input_compression_ratio': self._input_compression_ratio,
+        'output_compression_ratio': self._output_compression_ratio,
+        'strides': self._strides,
+        'kernel_size': self._kernel_size,
+        'divisible_by': self._divisible_by,
+        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'use_residual': self._use_residual,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    base_config = super(TuckerConvBlock, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(self, inputs, training=None):
+    shortcut = inputs
+
+    x = self._conv0(inputs)
+    x = self._norm0(x)
+    x = self._activation_layer0(x)
+
+    x = self._conv1(x)
+    x = self._norm1(x)
+    x = self._activation_layer1(x)
+
+    x = self._conv2(x)
+    x = self._norm2(x)
+
+    if (self._use_residual and
+        self._in_filters == self._out_filters and
+        self._strides == 1):
+      if self._stochastic_depth:
+        x = self._stochastic_depth(x, training=training)
+      x = self._add([x, shortcut])
+
+    return x
--- a/official/vision/beta/modeling/layers/nn_blocks_test.py
+++ b/official/vision/beta/modeling/layers/nn_blocks_test.py
@@ -32,8 +32,7 @@ def distribution_strategy_combinations() -> Iterable[Tuple[Any, ...]]:
          strategy_combinations.default_strategy,
          strategy_combinations.cloud_tpu_strategy,
          strategy_combinations.one_device_strategy_gpu,
-      ],
-  )
+      ],)


 class NNBlocksTest(parameterized.TestCase, tf.test.TestCase):
@@ -92,9 +91,9 @@ class NNBlocksTest(parameterized.TestCase, tf.test.TestCase):
      (nn_blocks.InvertedBottleneckBlock, 1, 1, 0.2, None),
      (nn_blocks.InvertedBottleneckBlock, 1, 1, None, 0.2),
  )
-  def test_invertedbottleneck_block_creation(
-      self, block_fn, expand_ratio, strides, se_ratio,
-      stochastic_depth_drop_rate):
+  def test_invertedbottleneck_block_creation(self, block_fn, expand_ratio,
+                                             strides, se_ratio,
+                                             stochastic_depth_drop_rate):
    input_size = 128
    in_filters = 24
    out_filters = 40
@@ -114,6 +113,31 @@ class NNBlocksTest(parameterized.TestCase, tf.test.TestCase):
        [1, input_size // strides, input_size // strides, out_filters],
        features.shape.as_list())

+  @parameterized.parameters(
+      (nn_blocks.TuckerConvBlock, 1, 0.25, 0.25),
+      (nn_blocks.TuckerConvBlock, 2, 0.25, 0.25),
+  )
+  def test_tucker_conv_block(
+      self, block_fn, strides,
+      input_compression_ratio, output_compression_ratio):
+    input_size = 128
+    in_filters = 24
+    out_filters = 24
+    inputs = tf.keras.Input(
+        shape=(input_size, input_size, in_filters), batch_size=1)
+    block = block_fn(
+        in_filters=in_filters,
+        out_filters=out_filters,
+        input_compression_ratio=input_compression_ratio,
+        output_compression_ratio=output_compression_ratio,
+        strides=strides)
+
+    features = block(inputs)
+
+    self.assertAllEqual(
+        [1, input_size // strides, input_size // strides, out_filters],
+        features.shape.as_list())
+

 class ResidualInnerTest(parameterized.TestCase, tf.test.TestCase):

@@ -149,6 +173,32 @@ class BottleneckResidualInnerTest(parameterized.TestCase, tf.test.TestCase):
    self.assertEqual(expected_output_shape, output.shape.as_list())


+class DepthwiseSeparableConvBlockTest(parameterized.TestCase, tf.test.TestCase):
+
+  @combinations.generate(distribution_strategy_combinations())
+  def test_shape(self, distribution):
+    batch_size, height, width, num_channels = 8, 32, 32, 32
+    num_filters = 64
+    strides = 2
+
+    input_tensor = tf.random.normal(
+        shape=[batch_size, height, width, num_channels])
+    with distribution.scope():
+      block = nn_blocks.DepthwiseSeparableConvBlock(
+          num_filters, strides=strides)
+      config_dict = block.get_config()
+      recreate_block = nn_blocks.DepthwiseSeparableConvBlock(**config_dict)
+
+    output_tensor = block(input_tensor)
+    expected_output_shape = [
+        batch_size, height // strides, width // strides, num_filters
+    ]
+    self.assertEqual(output_tensor.shape.as_list(), expected_output_shape)
+
+    output_tensor = recreate_block(input_tensor)
+    self.assertEqual(output_tensor.shape.as_list(), expected_output_shape)
+
+
 class ReversibleLayerTest(parameterized.TestCase, tf.test.TestCase):

  @combinations.generate(distribution_strategy_combinations())
@@ -160,13 +210,9 @@ class ReversibleLayerTest(parameterized.TestCase, tf.test.TestCase):
    input_tensor = tf.random.uniform(shape=[bsz, h, w, c])
    with distribution.scope():
      f = nn_blocks.ResidualInner(
-          filters=filters // 2,
-          strides=strides,
-          batch_norm_first=True)
+          filters=filters // 2, strides=strides, batch_norm_first=True)
      g = nn_blocks.ResidualInner(
-          filters=filters // 2,
-          strides=1,
-          batch_norm_first=True)
+          filters=filters // 2, strides=1, batch_norm_first=True)
      test_layer = nn_blocks.ReversibleLayer(f, g)
      test_layer.build(input_tensor.shape)
      optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
@@ -199,13 +245,9 @@ class ReversibleLayerTest(parameterized.TestCase, tf.test.TestCase):
    input_tensor = tf.random.uniform(shape=[bsz, h, w, c])
    with distribution.scope():
      f = nn_blocks.ResidualInner(
-          filters=filters // 2,
-          strides=strides,
-          batch_norm_first=False)
+          filters=filters // 2, strides=strides, batch_norm_first=False)
      g = nn_blocks.ResidualInner(
-          filters=filters // 2,
-          strides=1,
-          batch_norm_first=False)
+          filters=filters // 2, strides=1, batch_norm_first=False)
      test_layer = nn_blocks.ReversibleLayer(f, g)
      test_layer(input_tensor, training=False)  # init weights
      optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
@@ -247,24 +289,16 @@ class ReversibleLayerTest(parameterized.TestCase, tf.test.TestCase):
    input_tensor = tf.random.uniform(shape=[bsz, h, w, c * 4])  # bottleneck
    with distribution.scope():
      f_manual = nn_blocks.BottleneckResidualInner(
-          filters=filters // 2,
-          strides=strides,
-          batch_norm_first=False)
+          filters=filters // 2, strides=strides, batch_norm_first=False)
      g_manual = nn_blocks.BottleneckResidualInner(
-          filters=filters // 2,
-          strides=1,
-          batch_norm_first=False)
+          filters=filters // 2, strides=1, batch_norm_first=False)
      manual_grad_layer = nn_blocks.ReversibleLayer(f_manual, g_manual)
      manual_grad_layer(input_tensor, training=False)  # init weights

      f_auto = nn_blocks.BottleneckResidualInner(
-          filters=filters // 2,
-          strides=strides,
-          batch_norm_first=False)
+          filters=filters // 2, strides=strides, batch_norm_first=False)
      g_auto = nn_blocks.BottleneckResidualInner(
-          filters=filters // 2,
-          strides=1,
-          batch_norm_first=False)
+          filters=filters // 2, strides=1, batch_norm_first=False)
      auto_grad_layer = nn_blocks.ReversibleLayer(
          f_auto, g_auto, manual_grads=False)
      auto_grad_layer(input_tensor)  # init weights
@@ -294,12 +328,12 @@ class ReversibleLayerTest(parameterized.TestCase, tf.test.TestCase):
      self.assertAllClose(
          distribution.experimental_local_results(manual_grad),
          distribution.experimental_local_results(auto_grad),
-          atol=5e-3, rtol=5e-3)
+          atol=5e-3,
+          rtol=5e-3)

    # Verify that BN moving mean and variance is correct.
-    for manual_var, auto_var in zip(
-        manual_grad_layer.non_trainable_variables,
-        auto_grad_layer.non_trainable_variables):
+    for manual_var, auto_var in zip(manual_grad_layer.non_trainable_variables,
+                                    auto_grad_layer.non_trainable_variables):
      self.assertAllClose(manual_var, auto_var)



--- a/official/vision/beta/modeling/layers/nn_layers.py
+++ b/official/vision/beta/modeling/layers/nn_layers.py
@@ -30,7 +30,8 @@ Activation = Union[str, Callable]

 def make_divisible(value: float,
                   divisor: int,
-                   min_value: Optional[float] = None
+                   min_value: Optional[float] = None,
+                   round_down_protect: bool = True,
                   ) -> int:
  """This is to ensure that all layers have channels that are divisible by 8.

@@ -38,6 +39,8 @@ def make_divisible(value: float,
    value: A `float` of original value.
    divisor: An `int` of the divisor that need to be checked upon.
    min_value: A `float` of  minimum value threshold.
+    round_down_protect: A `bool` indicating whether round down more than 10%
+      will be allowed.

  Returns:
    The adjusted value in `int` that is divisible against divisor.
@@ -46,7 +49,7 @@ def make_divisible(value: float,
    min_value = divisor
  new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
  # Make sure that round down does not go down by more than 10%.
-  if new_value < 0.9 * value:
+  if round_down_protect and new_value < 0.9 * value:
    new_value += divisor
  return int(new_value)

@@ -55,7 +58,8 @@ def round_filters(filters: int,
                  multiplier: float,
                  divisor: int = 8,
                  min_depth: Optional[int] = None,
-                  skip: bool = False):
+                  round_down_protect: bool = True,
+                  skip: bool = False) -> int:
  """Rounds number of filters based on width multiplier."""
  orig_f = filters
  if skip or not multiplier:
@@ -63,7 +67,8 @@ def round_filters(filters: int,

  new_filters = make_divisible(value=filters * multiplier,
                               divisor=divisor,
-                               min_value=min_depth)
+                               min_value=min_depth,
+                               round_down_protect=round_down_protect)

  logging.info('round_filter input=%s output=%s', orig_f, new_filters)
  return int(new_filters)
@@ -80,39 +85,6 @@ def get_padding_for_kernel_size(kernel_size):
        kernel_size))


-def hard_swish(x: tf.Tensor) -> tf.Tensor:
-  """A Swish6/H-Swish activation function.
-
-  Reference: Section 5.2 of Howard et al. "Searching for MobileNet V3."
-  https://arxiv.org/pdf/1905.02244.pdf
-
-  Args:
-    x: the input tensor.
-
-  Returns:
-    The activation output.
-  """
-  return x * tf.nn.relu6(x + 3.) * (1. / 6.)
-
-tf.keras.utils.get_custom_objects().update({'hard_swish': hard_swish})
-
-
-def simple_swish(x: tf.Tensor) -> tf.Tensor:
-  """A swish/silu activation function without custom gradients.
-
-  Useful for exporting to SavedModel to avoid custom gradient warnings.
-
-  Args:
-    x: the input tensor.
-
-  Returns:
-    The activation output.
-  """
-  return x * tf.math.sigmoid(x)
-
-tf.keras.utils.get_custom_objects().update({'simple_swish': simple_swish})
-
-
 @tf.keras.utils.register_keras_serializable(package='Vision')
 class SqueezeExcitation(tf.keras.layers.Layer):
  """Creates a squeeze and excitation layer."""
@@ -128,6 +100,7 @@ class SqueezeExcitation(tf.keras.layers.Layer):
               bias_regularizer=None,
               activation='relu',
               gating_activation='sigmoid',
+               round_down_protect=True,
               **kwargs):
    """Initializes a squeeze and excitation layer.

@@ -148,6 +121,8 @@ class SqueezeExcitation(tf.keras.layers.Layer):
      activation: A `str` name of the activation function.
      gating_activation: A `str` name of the activation function for final
        gating function.
+      round_down_protect: A `bool` of whether round down more than 10% will be
+        allowed.
      **kwargs: Additional keyword arguments to be passed.
    """
    super(SqueezeExcitation, self).__init__(**kwargs)
@@ -156,6 +131,7 @@ class SqueezeExcitation(tf.keras.layers.Layer):
    self._out_filters = out_filters
    self._se_ratio = se_ratio
    self._divisible_by = divisible_by
+    self._round_down_protect = round_down_protect
    self._use_3d_input = use_3d_input
    self._activation = activation
    self._gating_activation = gating_activation
@@ -178,7 +154,8 @@ class SqueezeExcitation(tf.keras.layers.Layer):
  def build(self, input_shape):
    num_reduced_filters = make_divisible(
        max(1, int(self._in_filters * self._se_ratio)),
-        divisor=self._divisible_by)
+        divisor=self._divisible_by,
+        round_down_protect=self._round_down_protect)

    self._se_reduce = tf.keras.layers.Conv2D(
        filters=num_reduced_filters,
@@ -214,6 +191,7 @@ class SqueezeExcitation(tf.keras.layers.Layer):
        'bias_regularizer': self._bias_regularizer,
        'activation': self._activation,
        'gating_activation': self._gating_activation,
+        'round_down_protect': self._round_down_protect,
    }
    base_config = super(SqueezeExcitation, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))
@@ -1239,7 +1217,7 @@ class SpatialPyramidPooling(tf.keras.layers.Layer):

    self.aspp_layers.append(pooling + [conv2, norm2])

-    self._resize_layer = tf.keras.layers.Resizing(
+    self._resizing_layer = tf.keras.layers.Resizing(
        height, width, interpolation=self._interpolation, dtype=tf.float32)

    self._projection = [
@@ -1272,7 +1250,7 @@ class SpatialPyramidPooling(tf.keras.layers.Layer):

      # Apply resize layer to the end of the last set of layers.
      if i == len(self.aspp_layers) - 1:
-        x = self._resize_layer(x)
+        x = self._resizing_layer(x)

      result.append(tf.cast(x, inputs.dtype))
    x = self._concat_layer(result)

--- a/official/vision/beta/modeling/layers/nn_layers_test.py
+++ b/official/vision/beta/modeling/layers/nn_layers_test.py
@@ -24,11 +24,6 @@ from official.vision.beta.modeling.layers import nn_layers

 class NNLayersTest(parameterized.TestCase, tf.test.TestCase):

-  def test_hard_swish(self):
-    activation = tf.keras.layers.Activation('hard_swish')
-    output = activation(tf.constant([-3, -1.5, 0, 3]))
-    self.assertAllEqual(output, [0., -0.375, 0., 3.])
-
  def test_scale(self):
    scale = nn_layers.Scale(initializer=tf.keras.initializers.constant(10.))
    output = scale(3.)

--- a/official/vision/beta/modeling/layers/roi_sampler.py
+++ b/official/vision/beta/modeling/layers/roi_sampler.py
@@ -14,10 +14,13 @@

 """Contains definitions of ROI sampler."""
 # Import libraries
+
 import tensorflow as tf

-from official.vision import keras_cv
 from official.vision.beta.modeling.layers import box_sampler
+from official.vision.beta.ops import box_matcher
+from official.vision.beta.ops import iou_similarity
+from official.vision.beta.ops import target_gather


 @tf.keras.utils.register_keras_serializable(package='Vision')
@@ -64,14 +67,14 @@ class ROISampler(tf.keras.layers.Layer):
        'skip_subsampling': skip_subsampling,
    }

-    self._sim_calc = keras_cv.ops.IouSimilarity()
-    self._box_matcher = keras_cv.ops.BoxMatcher(
+    self._sim_calc = iou_similarity.IouSimilarity()
+    self._box_matcher = box_matcher.BoxMatcher(
        thresholds=[
            background_iou_low_threshold, background_iou_high_threshold,
            foreground_iou_threshold
        ],
        indicators=[-3, -1, -2, 1])
-    self._target_gather = keras_cv.ops.TargetGather()
+    self._target_gather = target_gather.TargetGather()

    self._sampler = box_sampler.BoxSampler(
        num_sampled_rois, foreground_fraction)

--- a/official/vision/beta/modeling/segmentation_model.py
+++ b/official/vision/beta/modeling/segmentation_model.py
@@ -62,7 +62,7 @@ class SegmentationModel(tf.keras.Model):
    else:
      decoder_features = backbone_features

-    return self.head(backbone_features, decoder_features)
+    return self.head((backbone_features, decoder_features))

  @property
  def checkpoint_items(

--- a/official/vision/beta/ops/anchor.py
+++ b/official/vision/beta/ops/anchor.py
@@ -15,12 +15,18 @@
 """Anchor box and labeler definition."""

 import collections
+
 # Import libraries
+
 import tensorflow as tf
-from official.vision import keras_cv
-from official.vision.detection.utils.object_detection import balanced_positive_negative_sampler
-from official.vision.detection.utils.object_detection import box_list
-from official.vision.detection.utils.object_detection import faster_rcnn_box_coder
+
+from official.vision.beta.ops import anchor_generator
+from official.vision.beta.ops import box_matcher
+from official.vision.beta.ops import iou_similarity
+from official.vision.beta.ops import target_gather
+from official.vision.utils.object_detection import balanced_positive_negative_sampler
+from official.vision.utils.object_detection import box_list
+from official.vision.utils.object_detection import faster_rcnn_box_coder


 class Anchor(object):
@@ -132,9 +138,9 @@ class AnchorLabeler(object):
        upper-bound threshold to assign negative labels for anchors. An anchor
        with a score below the threshold is labeled negative.
    """
-    self.similarity_calc = keras_cv.ops.IouSimilarity()
-    self.target_gather = keras_cv.ops.TargetGather()
-    self.matcher = keras_cv.ops.BoxMatcher(
+    self.similarity_calc = iou_similarity.IouSimilarity()
+    self.target_gather = target_gather.TargetGather()
+    self.matcher = box_matcher.BoxMatcher(
        thresholds=[unmatched_threshold, match_threshold],
        indicators=[-1, -2, 1],
        force_match_for_each_col=True)
@@ -343,7 +349,7 @@ def build_anchor_generator(min_level, max_level, num_scales, aspect_ratios,
    stride = 2**level
    strides[str(level)] = stride
    anchor_sizes[str(level)] = anchor_size * stride
-  anchor_gen = keras_cv.ops.AnchorGenerator(
+  anchor_gen = anchor_generator.AnchorGenerator(
      anchor_sizes=anchor_sizes,
      scales=scales,
      aspect_ratios=aspect_ratios,

--- a/official/vision/keras_cv/ops/anchor_generator.py
+++ b/official/vision/keras_cv/ops/anchor_generator.py
--- a/official/vision/keras_cv/ops/anchor_generator_test.py
+++ b/official/vision/keras_cv/ops/anchor_generator_test.py
@@ -16,7 +16,7 @@

 from absl.testing import parameterized
 import tensorflow as tf
-from official.vision.keras_cv.ops import anchor_generator
+from official.vision.beta.ops import anchor_generator


 class AnchorGeneratorTest(parameterized.TestCase, tf.test.TestCase):

--- a/official/vision/beta/ops/augment_test.py
+++ b/official/vision/beta/ops/augment_test.py
@@ -16,7 +16,6 @@

 from __future__ import absolute_import
 from __future__ import division
-# from __future__ import google_type_annotations
 from __future__ import print_function

 import random

--- a/official/vision/keras_cv/ops/box_matcher.py
+++ b/official/vision/keras_cv/ops/box_matcher.py
--- a/official/vision/keras_cv/ops/box_matcher_test.py
+++ b/official/vision/keras_cv/ops/box_matcher_test.py
@@ -16,7 +16,7 @@

 import tensorflow as tf

-from official.vision.keras_cv.ops import box_matcher
+from official.vision.beta.ops import box_matcher


 class BoxMatcherTest(tf.test.TestCase):

--- a/official/vision/keras_cv/ops/iou_similarity.py
+++ b/official/vision/keras_cv/ops/iou_similarity.py
--- a/official/vision/keras_cv/ops/iou_similarity_test.py
+++ b/official/vision/keras_cv/ops/iou_similarity_test.py
@@ -16,7 +16,7 @@

 import tensorflow as tf

-from official.vision.keras_cv.ops import iou_similarity
+from official.vision.beta.ops import iou_similarity


 class BoxMatcherTest(tf.test.TestCase):

--- a/official/vision/keras_cv/ops/target_gather.py
+++ b/official/vision/keras_cv/ops/target_gather.py
--- a/official/vision/keras_cv/ops/target_gather_test.py
+++ b/official/vision/keras_cv/ops/target_gather_test.py
@@ -16,7 +16,7 @@

 import tensorflow as tf

-from official.vision.keras_cv.ops import target_gather
+from official.vision.beta.ops import target_gather


 class TargetGatherTest(tf.test.TestCase):

--- a/official/vision/beta/projects/assemblenet/modeling/assemblenet.py
+++ b/official/vision/beta/projects/assemblenet/modeling/assemblenet.py
@@ -884,7 +884,6 @@ class AssembleNet(tf.keras.Model):
        inputs=original_inputs, outputs=streams, **kwargs)


-@tf.keras.utils.register_keras_serializable(package='Vision')
 class AssembleNetModel(tf.keras.Model):
  """An AssembleNet model builder."""


--- a/official/vision/beta/projects/centernet/configs/centernet.py
+++ b/official/vision/beta/projects/centernet/configs/centernet.py
@@ -17,11 +17,10 @@
 import dataclasses
 import os
 from typing import List, Optional, Tuple
-
+from official.core import config_definitions as cfg
 from official.core import exp_factory
 from official.modeling import hyperparams
 from official.modeling import optimization
-from official.modeling.hyperparams import config_definitions as cfg
 from official.vision.beta.configs import common
 from official.vision.beta.projects.centernet.configs import backbones


--- a/official/vision/beta/projects/centernet/modeling/backbones/hourglass.py
+++ b/official/vision/beta/projects/centernet/modeling/backbones/hourglass.py
@@ -48,7 +48,6 @@ HOURGLASS_SPECS = {
 }


-@tf.keras.utils.register_keras_serializable(package='centernet')
 class Hourglass(tf.keras.Model):
  """CenterNet Hourglass backbone."""


--- a/official/vision/beta/projects/centernet/modeling/heads/centernet_head.py
+++ b/official/vision/beta/projects/centernet/modeling/heads/centernet_head.py
@@ -21,7 +21,6 @@ import tensorflow as tf
 from official.vision.beta.projects.centernet.modeling.layers import cn_nn_blocks


-@tf.keras.utils.register_keras_serializable(package='centernet')
 class CenterNetHead(tf.keras.Model):
  """CenterNet Head."""