Internal change

PiperOrigin-RevId: 410943892

Internal change
PiperOrigin-RevId: 410943892
464bf002 · A. Unique TensorFlower · 2a595d51 · 464bf002 · 464bf002 · 464bf002
Commit 464bf002 authored Nov 18, 2021 by A. Unique TensorFlower
9 changed files
--- a/official/vision/beta/configs/backbones.py
+++ b/official/vision/beta/configs/backbones.py
@@ -98,6 +98,13 @@ class RevNet(hyperparams.Config):
  model_id: int = 56


+@dataclasses.dataclass
+class MobileDet(hyperparams.Config):
+  """Mobiledet config."""
+  model_id: str = 'MobileDetCPU'
+  filter_size_scale: float = 1.0
+
+
 @dataclasses.dataclass
 class Backbone(hyperparams.OneOfConfig):
  """Configuration for backbones.
@@ -111,6 +118,7 @@ class Backbone(hyperparams.OneOfConfig):
    spinenet: spinenet backbone config.
    spinenet_mobile: mobile spinenet backbone config.
    mobilenet: mobilenet backbone config.
+    mobiledet: mobiledet backbone config.
  """
  type: Optional[str] = None
  resnet: ResNet = ResNet()
@@ -120,3 +128,5 @@ class Backbone(hyperparams.OneOfConfig):
  spinenet: SpineNet = SpineNet()
  spinenet_mobile: SpineNetMobile = SpineNetMobile()
  mobilenet: MobileNet = MobileNet()
+  mobiledet: MobileDet = MobileDet()
+
--- a/official/vision/beta/configs/experiments/retinanet/coco_mobiledetcpu_tpu.yaml
+++ b/official/vision/beta/configs/experiments/retinanet/coco_mobiledetcpu_tpu.yaml
+# --experiment_type=retinanet_mobile_coco
+# COCO AP 27.0%
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 3.0e-05
+  model:
+    anchor:
+      anchor_size: 3
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      mobilenet:
+        model_id: 'MobileDetCPU'
+        filter_size_scale: 1.0
+      type: 'mobiledet'
+    decoder:
+      type: 'fpn'
+      fpn:
+        num_filters: 128
+        use_separable_conv: true
+    head:
+      num_convs: 4
+      num_filters: 128
+      use_separable_conv: true
+    input_size: [320 320, 3]
+    max_level: 6
+    min_level: 3
+    norm_activation:
+      activation: 'relu6'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.5
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [263340, 272580]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 277200
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/beta/modeling/backbones/__init__.py
+++ b/official/vision/beta/modeling/backbones/__init__.py
@@ -16,6 +16,7 @@
 """Backbones package definition."""

 from official.vision.beta.modeling.backbones.efficientnet import EfficientNet
+from official.vision.beta.modeling.backbones.mobiledet import MobileDet
 from official.vision.beta.modeling.backbones.mobilenet import MobileNet
 from official.vision.beta.modeling.backbones.resnet import ResNet
 from official.vision.beta.modeling.backbones.resnet_3d import ResNet3D

--- a/official/vision/beta/modeling/backbones/factory_test.py
+++ b/official/vision/beta/modeling/backbones/factory_test.py
@@ -189,6 +189,40 @@ class FactoryTest(tf.test.TestCase, parameterized.TestCase):
        norm_momentum=0.99,
        norm_epsilon=1e-5)

+  @combinations.generate(
+      combinations.combine(
+          model_id=[
+              'MobileDetCPU',
+              'MobileDetDSP',
+              'MobileDetEdgeTPU',
+              'MobileDetGPU'],
+          filter_size_scale=[1.0, 0.75],
+      ))
+  def test_mobiledet_creation(self, model_id, filter_size_scale):
+    """Test creation of Mobiledet models."""
+
+    network = backbones.MobileDet(
+        model_id=model_id,
+        filter_size_scale=filter_size_scale,
+        norm_momentum=0.99,
+        norm_epsilon=1e-5)
+
+    backbone_config = backbones_cfg.Backbone(
+        type='mobiledet',
+        mobiledet=backbones_cfg.MobileDet(
+            model_id=model_id, filter_size_scale=filter_size_scale))
+    norm_activation_config = common_cfg.NormActivation(
+        norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False)
+
+    factory_network = factory.build_backbone(
+        input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
+        backbone_config=backbone_config,
+        norm_activation_config=norm_activation_config)
+
+    network_config = network.get_config()
+    factory_network_config = factory_network.get_config()
+
+    self.assertEqual(network_config, factory_network_config)

 if __name__ == '__main__':
  tf.test.main()
--- a/official/vision/beta/modeling/backbones/mobiledet.py
+++ b/official/vision/beta/modeling/backbones/mobiledet.py
--- a/official/vision/beta/modeling/backbones/mobiledet_test.py
+++ b/official/vision/beta/modeling/backbones/mobiledet_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for Mobiledet."""
+
+import itertools
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.vision.beta.modeling.backbones import mobiledet
+
+
+class MobileDetTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      'MobileDetCPU',
+      'MobileDetDSP',
+      'MobileDetEdgeTPU',
+      'MobileDetGPU',
+  )
+  def test_serialize_deserialize(self, model_id):
+    # Create a network object that sets all of its config options.
+    kwargs = dict(
+        model_id=model_id,
+        filter_size_scale=1.0,
+        use_sync_bn=False,
+        kernel_initializer='VarianceScaling',
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        norm_momentum=0.99,
+        norm_epsilon=0.001,
+        min_depth=8,
+        divisible_by=8,
+        regularize_depthwise=False,
+    )
+    network = mobiledet.MobileDet(**kwargs)
+
+    expected_config = dict(kwargs)
+    self.assertEqual(network.get_config(), expected_config)
+
+    # Create another network object from the first object's config.
+    new_network = mobiledet.MobileDet.from_config(network.get_config())
+
+    # Validate that the config can be forced to JSON.
+    _ = new_network.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(network.get_config(), new_network.get_config())
+
+  @parameterized.parameters(
+      itertools.product(
+          [1, 3],
+          [
+              'MobileDetCPU',
+              'MobileDetDSP',
+              'MobileDetEdgeTPU',
+              'MobileDetGPU',
+          ],
+      ))
+  def test_input_specs(self, input_dim, model_id):
+    """Test different input feature dimensions."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
+    network = mobiledet.MobileDet(model_id=model_id, input_specs=input_specs)
+
+    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
+    _ = network(inputs)
+
+  @parameterized.parameters(
+      itertools.product(
+          [
+              'MobileDetCPU',
+              'MobileDetDSP',
+              'MobileDetEdgeTPU',
+              'MobileDetGPU',
+          ],
+          [32, 224],
+      ))
+  def test_mobiledet_creation(self, model_id, input_size):
+    """Test creation of MobileDet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    mobiledet_layers = {
+        # The number of filters of layers having outputs been collected
+        # for filter_size_scale = 1.0
+        'MobileDetCPU': [8, 16, 32, 72, 144],
+        'MobileDetDSP': [24, 32, 64, 144, 240],
+        'MobileDetEdgeTPU': [16, 16, 40, 96, 384],
+        'MobileDetGPU': [16, 32, 64, 128, 384],
+    }
+
+    network = mobiledet.MobileDet(model_id=model_id,
+                                  filter_size_scale=1.0)
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+
+    for idx, num_filter in enumerate(mobiledet_layers[model_id]):
+      self.assertAllEqual(
+          [1, input_size / 2 ** (idx+1), input_size / 2 ** (idx+1), num_filter],
+          endpoints[str(idx+1)].shape.as_list())
--- a/official/vision/beta/modeling/layers/nn_blocks.py
+++ b/official/vision/beta/modeling/layers/nn_blocks.py
@@ -497,6 +497,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
               activation='relu',
               se_inner_activation='relu',
               se_gating_activation='sigmoid',
+               se_round_down_protect=True,
               expand_se_in_filters=False,
               depthwise_activation=None,
               use_sync_bn=False,
@@ -532,6 +533,8 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
      se_inner_activation: A `str` name of squeeze-excitation inner activation.
      se_gating_activation: A `str` name of squeeze-excitation gating
        activation.
+      se_round_down_protect: A `bool` of whether round down more than 10%
+        will be allowed in SE layer.
      expand_se_in_filters: A `bool` of whether or not to expand in_filter in
        squeeze and excitation layer.
      depthwise_activation: A `str` name of the activation function for
@@ -573,6 +576,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
    self._se_inner_activation = se_inner_activation
    self._se_gating_activation = se_gating_activation
    self._depthwise_activation = depthwise_activation
+    self._se_round_down_protect = se_round_down_protect
    self._kernel_initializer = kernel_initializer
    self._norm_momentum = norm_momentum
    self._norm_epsilon = norm_epsilon
@@ -652,6 +656,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
          out_filters=expand_filters,
          se_ratio=self._se_ratio,
          divisible_by=self._divisible_by,
+          round_down_protect=self._se_round_down_protect,
          kernel_initializer=self._kernel_initializer,
          kernel_regularizer=self._kernel_regularizer,
          bias_regularizer=self._bias_regularizer,
@@ -700,6 +705,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
        'activation': self._activation,
        'se_inner_activation': self._se_inner_activation,
        'se_gating_activation': self._se_gating_activation,
+        'se_round_down_protect': self._se_round_down_protect,
        'expand_se_in_filters': self._expand_se_in_filters,
        'depthwise_activation': self._depthwise_activation,
        'dilation_rate': self._dilation_rate,
@@ -1310,3 +1316,196 @@ class DepthwiseSeparableConvBlock(tf.keras.layers.Layer):
    x = self._conv1(x)
    x = self._norm1(x)
    return self._activation_fn(x)
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class TuckerConvBlock(tf.keras.layers.Layer):
+  """An Tucker block (generalized bottleneck)."""
+
+  def __init__(self,
+               in_filters,
+               out_filters,
+               input_compression_ratio,
+               output_compression_ratio,
+               strides,
+               kernel_size=3,
+               stochastic_depth_drop_rate=None,
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               use_sync_bn=False,
+               divisible_by=1,
+               use_residual=True,
+               norm_momentum=0.99,
+               norm_epsilon=0.001,
+               **kwargs):
+    """Initializes an inverted bottleneck block with BN after convolutions.
+
+    Args:
+      in_filters: An `int` number of filters of the input tensor.
+      out_filters: An `int` number of filters of the output tensor.
+      input_compression_ratio: An `float` of compression ratio for
+        input filters.
+      output_compression_ratio: An `float` of compression ratio for
+        output filters.
+      strides: An `int` block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      kernel_size: An `int` kernel_size of the depthwise conv layer.
+      stochastic_depth_drop_rate: A `float` or None. if not None, drop rate for
+        the stochastic depth layer.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
+        Default to None.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      divisible_by: An `int` that ensures all inner dimensions are divisible by
+        this number.
+      use_residual: A `bool` of whether to include residual connection between
+        input and output.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(TuckerConvBlock, self).__init__(**kwargs)
+
+    self._in_filters = in_filters
+    self._out_filters = out_filters
+    self._input_compression_ratio = input_compression_ratio
+    self._output_compression_ratio = output_compression_ratio
+    self._strides = strides
+    self._kernel_size = kernel_size
+    self._divisible_by = divisible_by
+    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
+    self._use_sync_bn = use_sync_bn
+    self._use_residual = use_residual
+    self._activation = activation
+    self._kernel_initializer = kernel_initializer
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+
+    if use_sync_bn:
+      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = tf.keras.layers.BatchNormalization
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+
+  def build(self, input_shape):
+    input_compressed_filters = nn_layers.make_divisible(
+        value=self._in_filters * self._input_compression_ratio,
+        divisor=self._divisible_by,
+        round_down_protect=False)
+
+    self._conv0 = tf.keras.layers.Conv2D(
+        filters=input_compressed_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm0 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    self._activation_layer0 = tf_utils.get_activation(
+        self._activation, use_keras_layer=True)
+
+    output_compressed_filters = nn_layers.make_divisible(
+        value=self._out_filters * self._output_compression_ratio,
+        divisor=self._divisible_by,
+        round_down_protect=False)
+
+    self._conv1 = tf.keras.layers.Conv2D(
+        filters=output_compressed_filters,
+        kernel_size=self._kernel_size,
+        strides=self._strides,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm1 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    self._activation_layer1 = tf_utils.get_activation(
+        self._activation, use_keras_layer=True)
+
+    # Last 1x1 conv.
+    self._conv2 = tf.keras.layers.Conv2D(
+        filters=self._out_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm2 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+
+    if self._stochastic_depth_drop_rate:
+      self._stochastic_depth = nn_layers.StochasticDepth(
+          self._stochastic_depth_drop_rate)
+    else:
+      self._stochastic_depth = None
+    self._add = tf.keras.layers.Add()
+
+    super(TuckerConvBlock, self).build(input_shape)
+
+  def get_config(self):
+    config = {
+        'in_filters': self._in_filters,
+        'out_filters': self._out_filters,
+        'input_compression_ratio': self._input_compression_ratio,
+        'output_compression_ratio': self._output_compression_ratio,
+        'strides': self._strides,
+        'kernel_size': self._kernel_size,
+        'divisible_by': self._divisible_by,
+        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'use_residual': self._use_residual,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    base_config = super(TuckerConvBlock, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(self, inputs, training=None):
+    shortcut = inputs
+
+    x = self._conv0(inputs)
+    x = self._norm0(x)
+    x = self._activation_layer0(x)
+
+    x = self._conv1(x)
+    x = self._norm1(x)
+    x = self._activation_layer1(x)
+
+    x = self._conv2(x)
+    x = self._norm2(x)
+
+    if (self._use_residual and
+        self._in_filters == self._out_filters and
+        self._strides == 1):
+      if self._stochastic_depth:
+        x = self._stochastic_depth(x, training=training)
+      x = self._add([x, shortcut])
+
+    return x
--- a/official/vision/beta/modeling/layers/nn_blocks_test.py
+++ b/official/vision/beta/modeling/layers/nn_blocks_test.py
@@ -113,6 +113,31 @@ class NNBlocksTest(parameterized.TestCase, tf.test.TestCase):
        [1, input_size // strides, input_size // strides, out_filters],
        features.shape.as_list())

+  @parameterized.parameters(
+      (nn_blocks.TuckerConvBlock, 1, 0.25, 0.25),
+      (nn_blocks.TuckerConvBlock, 2, 0.25, 0.25),
+  )
+  def test_tucker_conv_block(
+      self, block_fn, strides,
+      input_compression_ratio, output_compression_ratio):
+    input_size = 128
+    in_filters = 24
+    out_filters = 24
+    inputs = tf.keras.Input(
+        shape=(input_size, input_size, in_filters), batch_size=1)
+    block = block_fn(
+        in_filters=in_filters,
+        out_filters=out_filters,
+        input_compression_ratio=input_compression_ratio,
+        output_compression_ratio=output_compression_ratio,
+        strides=strides)
+
+    features = block(inputs)
+
+    self.assertAllEqual(
+        [1, input_size // strides, input_size // strides, out_filters],
+        features.shape.as_list())
+

 class ResidualInnerTest(parameterized.TestCase, tf.test.TestCase):


--- a/official/vision/beta/modeling/layers/nn_layers.py
+++ b/official/vision/beta/modeling/layers/nn_layers.py
@@ -30,7 +30,8 @@ Activation = Union[str, Callable]

 def make_divisible(value: float,
                   divisor: int,
-                   min_value: Optional[float] = None
+                   min_value: Optional[float] = None,
+                   round_down_protect: bool = True,
                   ) -> int:
  """This is to ensure that all layers have channels that are divisible by 8.

@@ -38,6 +39,8 @@ def make_divisible(value: float,
    value: A `float` of original value.
    divisor: An `int` of the divisor that need to be checked upon.
    min_value: A `float` of  minimum value threshold.
+    round_down_protect: A `bool` indicating whether round down more than 10%
+      will be allowed.

  Returns:
    The adjusted value in `int` that is divisible against divisor.
@@ -46,7 +49,7 @@ def make_divisible(value: float,
    min_value = divisor
  new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
  # Make sure that round down does not go down by more than 10%.
-  if new_value < 0.9 * value:
+  if round_down_protect and new_value < 0.9 * value:
    new_value += divisor
  return int(new_value)

@@ -55,7 +58,8 @@ def round_filters(filters: int,
                  multiplier: float,
                  divisor: int = 8,
                  min_depth: Optional[int] = None,
-                  skip: bool = False):
+                  round_down_protect: bool = True,
+                  skip: bool = False) -> int:
  """Rounds number of filters based on width multiplier."""
  orig_f = filters
  if skip or not multiplier:
@@ -63,7 +67,8 @@ def round_filters(filters: int,

  new_filters = make_divisible(value=filters * multiplier,
                               divisor=divisor,
-                               min_value=min_depth)
+                               min_value=min_depth,
+                               round_down_protect=round_down_protect)

  logging.info('round_filter input=%s output=%s', orig_f, new_filters)
  return int(new_filters)
@@ -95,6 +100,7 @@ class SqueezeExcitation(tf.keras.layers.Layer):
               bias_regularizer=None,
               activation='relu',
               gating_activation='sigmoid',
+               round_down_protect=True,
               **kwargs):
    """Initializes a squeeze and excitation layer.

@@ -115,6 +121,8 @@ class SqueezeExcitation(tf.keras.layers.Layer):
      activation: A `str` name of the activation function.
      gating_activation: A `str` name of the activation function for final
        gating function.
+      round_down_protect: A `bool` of whether round down more than 10% will be
+        allowed.
      **kwargs: Additional keyword arguments to be passed.
    """
    super(SqueezeExcitation, self).__init__(**kwargs)
@@ -123,6 +131,7 @@ class SqueezeExcitation(tf.keras.layers.Layer):
    self._out_filters = out_filters
    self._se_ratio = se_ratio
    self._divisible_by = divisible_by
+    self._round_down_protect = round_down_protect
    self._use_3d_input = use_3d_input
    self._activation = activation
    self._gating_activation = gating_activation
@@ -145,7 +154,8 @@ class SqueezeExcitation(tf.keras.layers.Layer):
  def build(self, input_shape):
    num_reduced_filters = make_divisible(
        max(1, int(self._in_filters * self._se_ratio)),
-        divisor=self._divisible_by)
+        divisor=self._divisible_by,
+        round_down_protect=self._round_down_protect)

    self._se_reduce = tf.keras.layers.Conv2D(
        filters=num_reduced_filters,
@@ -181,6 +191,7 @@ class SqueezeExcitation(tf.keras.layers.Layer):
        'bias_regularizer': self._bias_regularizer,
        'activation': self._activation,
        'gating_activation': self._gating_activation,
+        'round_down_protect': self._round_down_protect,
    }
    base_config = super(SqueezeExcitation, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))