Merge pull request #9317 from tensorflow:mobilenet-beta

PiperOrigin-RevId: 336771016

Merge pull request #9317 from tensorflow:mobilenet-beta
PiperOrigin-RevId: 336771016
caa61e1b · A. Unique TensorFlower · cd4b8c23 · d68c7b96 · caa61e1b · caa61e1b
Commit caa61e1b authored Oct 12, 2020 by A. Unique TensorFlower
19 changed files
--- a/official/modeling/activations/__init__.py
+++ b/official/modeling/activations/__init__.py
@@ -14,6 +14,8 @@
 # ==============================================================================
 """Activations package definition."""
 from official.modeling.activations.gelu import gelu
+from official.modeling.activations.relu import relu6
+from official.modeling.activations.sigmoid import hard_sigmoid
 from official.modeling.activations.swish import hard_swish
 from official.modeling.activations.swish import identity
 from official.modeling.activations.swish import simple_swish
--- a/official/modeling/activations/relu.py
+++ b/official/modeling/activations/relu.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Customized Relu activation."""
+import tensorflow as tf
+@tf.keras.utils.register_keras_serializable(package='Text')
+def relu6(features):
+  """Computes the Relu6 activation function.
+  Args:
+    features: A `Tensor` representing preactivation values.
+  Returns:
+    The activation value.
+  """
+  features = tf.convert_to_tensor(features)
+  return tf.nn.relu6(features)
--- a/official/modeling/activations/relu_test.py
+++ b/official/modeling/activations/relu_test.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the customized Relu activation."""
+import tensorflow as tf
+from tensorflow.python.keras import \
+  keras_parameterized  # pylint: disable=g-direct-tensorflow-import
+from official.modeling import activations
+@keras_parameterized.run_all_keras_modes
+class CustomizedReluTest(keras_parameterized.TestCase):
+  def test_relu6(self):
+    features = [[.25, 0, -.25], [-1, -2, 3]]
+    customized_relu6_data = activations.relu6(features)
+    relu6_data = tf.nn.relu6(features)
+    self.assertAllClose(customized_relu6_data, relu6_data)
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/modeling/activations/sigmoid.py
+++ b/official/modeling/activations/sigmoid.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Customized Sigmoid activation."""
+import tensorflow as tf
+@tf.keras.utils.register_keras_serializable(package='Text')
+def hard_sigmoid(features):
+  """Computes the hard sigmoid activation function.
+  Args:
+    features: A `Tensor` representing preactivation values.
+  Returns:
+    The activation value.
+  """
+  features = tf.convert_to_tensor(features)
+  return tf.nn.relu6(features + tf.constant(3.)) * 0.16667
--- a/official/modeling/activations/sigmoid_test.py
+++ b/official/modeling/activations/sigmoid_test.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the customized Sigmoid activation."""
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.keras import \
+  keras_parameterized  # pylint: disable=g-direct-tensorflow-import
+from official.modeling import activations
+@keras_parameterized.run_all_keras_modes
+class CustomizedSigmoidTest(keras_parameterized.TestCase):
+  def _hard_sigmoid_nn(self, x):
+    x = np.float32(x)
+    return tf.nn.relu6(x + 3.) * 0.16667
+  def test_hard_sigmoid(self):
+    features = [[.25, 0, -.25], [-1, -2, 3]]
+    customized_hard_sigmoid_data = activations.hard_sigmoid(features)
+    sigmoid_data = self._hard_sigmoid_nn(features)
+    self.assertAllClose(customized_hard_sigmoid_data, sigmoid_data)
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/modeling/tf_utils.py
+++ b/official/modeling/tf_utils.py
@@ -104,6 +104,8 @@ def get_activation(identifier):
        "gelu": activations.gelu,
        "simple_swish": activations.simple_swish,
        "hard_swish": activations.hard_swish,
+        "relu6": activations.relu6,
+        "hard_sigmoid": activations.hard_sigmoid,
        "identity": activations.identity,
    }
    identifier = str(identifier).lower()

--- a/official/vision/beta/configs/backbones.py
+++ b/official/vision/beta/configs/backbones.py
@@ -36,6 +36,14 @@ class EfficientNet(hyperparams.Config):
  se_ratio: float = 0.0
+@dataclasses.dataclass
+class MobileNet(hyperparams.Config):
+  """Mobilenet config."""
+  model_id: str = 'MobileNetV2'
+  filter_size_scale: float = 1.0
+  stochastic_depth_drop_rate: float = 0.0
 @dataclasses.dataclass
 class SpineNet(hyperparams.Config):
  """SpineNet config."""
@@ -60,9 +68,11 @@ class Backbone(hyperparams.OneOfConfig):
    revnet: revnet backbone config.
    efficientnet: efficientnet backbone config.
    spinenet: spinenet backbone config.
+    mobilenet: mobilenet backbone config.
  """
  type: Optional[str] = None
  resnet: ResNet = ResNet()
  revnet: RevNet = RevNet()
  efficientnet: EfficientNet = EfficientNet()
  spinenet: SpineNet = SpineNet()
+  mobilenet: MobileNet = MobileNet()
--- a/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_gpu.yaml
+++ b/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_gpu.yaml
+# MobileNetV2_1.0 ImageNet classification. 71.0% top-1 and 90.0% top-5 accuracy.
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float16'
+  loss_scale: 'dynamic'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetV2'
+        filter_size_scale: 1.0
+    dropout_rate: 0.2
+  losses:
+    l2_weight_decay: 0.00001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 1024  # 128 * 8
+    dtype: 'float16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: true
+    global_batch_size: 1024  # 128 * 8
+    dtype: 'float16'
+    drop_remainder: false
+trainer:
+  train_steps: 625500  # 500 epochs
+  validation_steps: 49
+  validation_interval: 1251
+  steps_per_loop: 1251  # NUM_EXAMPLES (1281167) // global_batch_size
+  summary_interval: 1251
+  checkpoint_interval: 1251
+  optimizer_config:
+    learning_rate:
+      type: 'exponential'
+      exponential:
+        initial_learning_rate: 0.064  # 0.008 * batch_size / 128
+        decay_steps: 3127  # 2.5 * steps_per_epoch
+        decay_rate: 0.96
+        staircase: true
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 6255
--- a/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_tpu.yaml
+++ b/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_tpu.yaml
+# MobileNetV2_1.0 ImageNet classification. 72.72% top-1 and 91.05% top-5 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetV2'
+        filter_size_scale: 1.0
+    dropout_rate: 0.2
+  losses:
+    l2_weight_decay: 0.00001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 156000  # 500 epochs
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312  # NUM_EXAMPLES (1281167) // global_batch_size
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    learning_rate:
+      type: 'exponential'
+      exponential:
+        initial_learning_rate: 0.256  # 0.008 * batch_size / 128
+        decay_steps: 780  # 2.5 * steps_per_epoch
+        decay_rate: 0.96
+        staircase: true
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/beta/configs/image_classification.py
+++ b/official/vision/beta/configs/image_classification.py
@@ -128,10 +128,10 @@ def image_classification_imagenet() -> cfg.ExperimentConfig:
                          80 * steps_per_epoch
                      ],
                      'values': [
-                          0.1 *  train_batch_size / 256,
+                          0.1 * train_batch_size / 256,
-                          0.01 *  train_batch_size / 256,
+                          0.01 * train_batch_size / 256,
-                          0.001 *  train_batch_size / 256,
+                          0.001 * train_batch_size / 256,
-                          0.0001 *  train_batch_size / 256,
+                          0.0001 * train_batch_size / 256,
                      ]
                  }
              },
@@ -215,3 +215,75 @@ def image_classification_imagenet_revnet() -> cfg.ExperimentConfig:
      ])
  return config
+@exp_factory.register_config_factory('mobilenet_imagenet')
+def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
+  """Image classification on imagenet with mobilenet."""
+  train_batch_size = 4096
+  eval_batch_size = 4096
+  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
+  config = cfg.ExperimentConfig(
+      task=ImageClassificationTask(
+          model=ImageClassificationModel(
+              num_classes=1001,
+              dropout_rate=0.2,
+              input_size=[224, 224, 3],
+              backbone=backbones.Backbone(
+                  type='mobilenet',
+                  mobilenet=backbones.MobileNet(
+                      model_id='MobileNetV2', filter_size_scale=1.0)),
+              norm_activation=common.NormActivation(
+                  norm_momentum=0.997, norm_epsilon=1e-3)),
+          losses=Losses(l2_weight_decay=1e-5, label_smoothing=0.1),
+          train_data=DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
+              is_training=True,
+              global_batch_size=train_batch_size),
+          validation_data=DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
+              is_training=False,
+              global_batch_size=eval_batch_size)),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          train_steps=500 * steps_per_epoch,
+          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'rmsprop',
+                  'rmsprop': {
+                      'rho': 0.9,
+                      'momentum': 0.9,
+                      'epsilon': 0.002,
+                  }
+              },
+              'learning_rate': {
+                  'type': 'exponential',
+                  'exponential': {
+                      'initial_learning_rate':
+                          0.008 * (train_batch_size // 128),
+                      'decay_steps':
+                          int(2.5 * steps_per_epoch),
+                      'decay_rate':
+                          0.98,
+                      'staircase':
+                          True
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 5 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              },
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+  return config
--- a/official/vision/beta/configs/image_classification_test.py
+++ b/official/vision/beta/configs/image_classification_test.py
@@ -28,7 +28,8 @@ from official.vision.beta.configs import image_classification as exp_cfg
 class ImageClassificationConfigTest(tf.test.TestCase, parameterized.TestCase):
  @parameterized.parameters(('resnet_imagenet',),
-                            ('revnet_imagenet',))
+                            ('revnet_imagenet',),
+                            ('mobilenet_imagenet'),)
  def test_image_classification_configs(self, config_name):
    config = exp_factory.get_exp_config(config_name)
    self.assertIsInstance(config, cfg.ExperimentConfig)

--- a/official/vision/beta/modeling/backbones/__init__.py
+++ b/official/vision/beta/modeling/backbones/__init__.py
@@ -16,6 +16,7 @@
 """Backbones package definition."""
 from official.vision.beta.modeling.backbones.efficientnet import EfficientNet
+from official.vision.beta.modeling.backbones.mobilenet import MobileNet
 from official.vision.beta.modeling.backbones.resnet import ResNet
 from official.vision.beta.modeling.backbones.resnet_3d import ResNet3D
 from official.vision.beta.modeling.backbones.revnet import RevNet

--- a/official/vision/beta/modeling/backbones/efficientnet.py
+++ b/official/vision/beta/modeling/backbones/efficientnet.py
@@ -16,11 +16,11 @@
 import math
 # Import libraries
-from absl import logging
 import tensorflow as tf
 from official.modeling import tf_utils
 from official.vision.beta.modeling.backbones import factory
 from official.vision.beta.modeling.layers import nn_blocks
+from official.vision.beta.modeling.layers import nn_layers
 layers = tf.keras.layers
@@ -50,22 +50,6 @@ SCALING_MAP = {
 }
-def round_filters(filters, multiplier, divisor=8, min_depth=None, skip=False):
-  """Round number of filters based on depth multiplier."""
-  orig_f = filters
-  if skip or not multiplier:
-    return filters
-  filters *= multiplier
-  min_depth = min_depth or divisor
-  new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
-  # Make sure that round down does not go down by more than 10%.
-  if new_filters < 0.9 * filters:
-    new_filters += divisor
-  logging.info('round_filter input=%s output=%s', orig_f, new_filters)
-  return int(new_filters)
 def round_repeats(repeats, multiplier, skip=False):
  """Round number of filters based on depth multiplier."""
  if skip or not multiplier:
@@ -96,8 +80,8 @@ class BlockSpec(object):
    self.kernel_size = kernel_size
    self.strides = strides
    self.expand_ratio = expand_ratio
-    self.in_filters = round_filters(in_filters, width_scale)
+    self.in_filters = nn_layers.round_filters(in_filters, width_scale)
-    self.out_filters = round_filters(out_filters, width_scale)
+    self.out_filters = nn_layers.round_filters(out_filters, width_scale)
    self.is_output = is_output
@@ -166,7 +150,7 @@ class EfficientNet(tf.keras.Model):
    # Build stem.
    x = layers.Conv2D(
-        filters=round_filters(32, width_scale),
+        filters=nn_layers.round_filters(32, width_scale),
        kernel_size=3,
        strides=2,
        use_bias=False,
@@ -198,7 +182,7 @@ class EfficientNet(tf.keras.Model):
    # Build the final conv for classification.
    x = layers.Conv2D(
-        filters=round_filters(1280, width_scale),
+        filters=nn_layers.round_filters(1280, width_scale),
        kernel_size=1,
        strides=1,
        use_bias=False,

--- a/official/vision/beta/modeling/backbones/factory_test.py
+++ b/official/vision/beta/modeling/backbones/factory_test.py
@@ -86,6 +86,40 @@ class FactoryTest(tf.test.TestCase, parameterized.TestCase):
    self.assertEqual(network_config, factory_network_config)
+  @combinations.generate(
+      combinations.combine(
+          model_id=['MobileNetV1', 'MobileNetV2',
+                    'MobileNetV3Large', 'MobileNetV3Small',
+                    'MobileNetV3EdgeTPU'],
+          filter_size_scale=[1.0, 0.75],
+      ))
+  def test_mobilenet_creation(self, model_id, filter_size_scale):
+    """Test creation of Mobilenet models."""
+    network = backbones.MobileNet(
+        model_id=model_id,
+        filter_size_scale=filter_size_scale,
+        norm_momentum=0.99,
+        norm_epsilon=1e-5)
+    backbone_config = backbones_cfg.Backbone(
+        type='mobilenet',
+        mobilenet=backbones_cfg.MobileNet(
+            model_id=model_id, filter_size_scale=filter_size_scale))
+    norm_activation_config = common_cfg.NormActivation(
+        norm_momentum=0.99, norm_epsilon=1e-5)
+    model_config = retinanet_cfg.RetinaNet(
+        backbone=backbone_config, norm_activation=norm_activation_config)
+    factory_network = factory.build_backbone(
+        input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
+        model_config=model_config)
+    network_config = network.get_config()
+    factory_network_config = factory_network.get_config()
+    self.assertEqual(network_config, factory_network_config)
  @combinations.generate(combinations.combine(model_id=['49'],))
  def test_spinenet_creation(self, model_id):
    """Test creation of SpineNet models."""

--- a/official/vision/beta/modeling/backbones/mobilenet.py
+++ b/official/vision/beta/modeling/backbones/mobilenet.py
--- a/official/vision/beta/modeling/backbones/mobilenet_test.py
+++ b/official/vision/beta/modeling/backbones/mobilenet_test.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for MobileNet."""
+import itertools
+# Import libraries
+from absl.testing import parameterized
+import tensorflow as tf
+from official.vision.beta.modeling.backbones import mobilenet
+class MobileNetTest(parameterized.TestCase, tf.test.TestCase):
+  @parameterized.parameters('MobileNetV1', 'MobileNetV2',
+                            'MobileNetV3Large', 'MobileNetV3Small',
+                            'MobileNetV3EdgeTPU')
+  def test_serialize_deserialize(self, model_id):
+    # Create a network object that sets all of its config options.
+    kwargs = dict(
+        model_id=model_id,
+        filter_size_scale=1.0,
+        stochastic_depth_drop_rate=None,
+        use_sync_bn=False,
+        kernel_initializer='VarianceScaling',
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        norm_momentum=0.99,
+        norm_epsilon=0.001,
+        output_stride=None,
+        min_depth=8,
+        divisible_by=8,
+        regularize_depthwise=False,
+        finegrain_classification_mode=True
+    )
+    network = mobilenet.MobileNet(**kwargs)
+    expected_config = dict(kwargs)
+    self.assertEqual(network.get_config(), expected_config)
+    # Create another network object from the first object's config.
+    new_network = mobilenet.MobileNet.from_config(network.get_config())
+    # Validate that the config can be forced to JSON.
+    _ = new_network.to_json()
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(network.get_config(), new_network.get_config())
+  @parameterized.parameters(
+      itertools.product((1, 3),
+                        ('MobileNetV1', 'MobileNetV2', 'MobileNetV3Large',
+                         'MobileNetV3Small', 'MobileNetV3EdgeTPU')))
+  def test_input_specs(self, input_dim, model_id):
+    """Test different input feature dimensions."""
+    tf.keras.backend.set_image_data_format('channels_last')
+    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
+    network = mobilenet.MobileNet(model_id=model_id, input_specs=input_specs)
+    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
+    _ = network(inputs)
+  @parameterized.parameters(32, 224)
+  def test_mobilenet_v1_creation(self, input_size):
+    """Test creation of EfficientNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+    network = mobilenet.MobileNet(model_id='MobileNetV1',
+                                  filter_size_scale=0.75)
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 24],
+                        endpoints[1].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 48],
+                        endpoints[2].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 96],
+                        endpoints[3].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 96],
+                        endpoints[4].shape.as_list())
+  @parameterized.parameters(32, 224)
+  def test_mobilenet_v2_creation(self, input_size):
+    """Test creation of EfficientNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+    network = mobilenet.MobileNet(model_id='MobileNetV2',
+                                  filter_size_scale=1.0)
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 32],
+                        endpoints[1].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 16],
+                        endpoints[2].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[3].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[4].shape.as_list())
+  @parameterized.parameters(32, 224)
+  def test_mobilenet_v3_small_creation(self, input_size):
+    """Test creation of EfficientNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+    network = mobilenet.MobileNet(model_id='MobileNetV3Small',
+                                  filter_size_scale=0.75)
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 16],
+                        endpoints[1].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 16],
+                        endpoints[2].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 3, input_size / 2 ** 3, 24],
+                        endpoints[3].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 3, input_size / 2 ** 3, 24],
+                        endpoints[4].shape.as_list())
+  @parameterized.parameters(32, 224)
+  def test_mobilenet_v3_large_creation(self, input_size):
+    """Test creation of EfficientNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+    network = mobilenet.MobileNet(model_id='MobileNetV3Large',
+                                  filter_size_scale=0.75)
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 16],
+                        endpoints[1].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 16],
+                        endpoints[2].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[3].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[4].shape.as_list())
+  @parameterized.parameters(32, 224)
+  def test_mobilenet_v3_edgetpu_creation(self, input_size):
+    """Test creation of EfficientNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+    network = mobilenet.MobileNet(model_id='MobileNetV3EdgeTPU',
+                                  filter_size_scale=0.75)
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 24],
+                        endpoints[1].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 16],
+                        endpoints[2].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[3].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[4].shape.as_list())
+  @parameterized.parameters(1.0, 0.75)
+  def test_mobilenet_v1_scaling(self, filter_size_scale):
+    mobilenet_v1_params = {
+        1.0: 3228864,
+        0.75: 1832976
+    }
+    input_size = 224
+    network = mobilenet.MobileNet(model_id='MobileNetV1',
+                                  filter_size_scale=filter_size_scale)
+    self.assertEqual(network.count_params(),
+                     mobilenet_v1_params[filter_size_scale])
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
+  @parameterized.parameters(1.0, 0.75)
+  def test_mobilenet_v2_scaling(self, filter_size_scale):
+    mobilenet_v2_params = {
+        1.0: 2257984,
+        0.75: 1382064
+    }
+    input_size = 224
+    network = mobilenet.MobileNet(model_id='MobileNetV2',
+                                  filter_size_scale=filter_size_scale)
+    self.assertEqual(network.count_params(),
+                     mobilenet_v2_params[filter_size_scale])
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
+  @parameterized.parameters(1.0, 0.75)
+  def test_mobilenet_v3_large_scaling(self, filter_size_scale):
+    mobilenet_v3_large_params = {
+        1.0: 4226432,
+        0.75: 2731616
+    }
+    input_size = 224
+    network = mobilenet.MobileNet(model_id='MobileNetV3Large',
+                                  filter_size_scale=filter_size_scale)
+    self.assertEqual(network.count_params(),
+                     mobilenet_v3_large_params[filter_size_scale])
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
+  @parameterized.parameters(1.0, 0.75)
+  def test_mobilenet_v3_small_scaling(self, filter_size_scale):
+    mobilenet_v3_small_params = {
+        1.0: 1529968,
+        0.75: 1026552
+    }
+    input_size = 224
+    network = mobilenet.MobileNet(model_id='MobileNetV3Small',
+                                  filter_size_scale=filter_size_scale)
+    self.assertEqual(network.count_params(),
+                     mobilenet_v3_small_params[filter_size_scale])
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
+  @parameterized.parameters(1.0, 0.75)
+  def test_mobilenet_v3_edgetpu_scaling(self, filter_size_scale):
+    mobilenet_v3_edgetpu_params = {
+        1.0: 2849312,
+        0.75: 1737288
+    }
+    input_size = 224
+    network = mobilenet.MobileNet(model_id='MobileNetV3EdgeTPU',
+                                  filter_size_scale=filter_size_scale)
+    self.assertEqual(network.count_params(),
+                     mobilenet_v3_edgetpu_params[filter_size_scale])
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
--- a/official/vision/beta/modeling/classification_model_test.py
+++ b/official/vision/beta/modeling/classification_model_test.py
@@ -77,6 +77,52 @@ class ClassificationNetworkTest(parameterized.TestCase, tf.test.TestCase):
    logits = model(inputs)
    self.assertAllEqual([2, num_classes], logits.numpy().shape)
+  @combinations.generate(
+      combinations.combine(
+          mobilenet_model_id=[
+              'MobileNetV1',
+              'MobileNetV2',
+              'MobileNetV3Large',
+              'MobileNetV3Small',
+              'MobileNetV3EdgeTPU'
+          ],
+          filter_size_scale=[1.0, 0.75],
+      ))
+  def test_mobilenet_network_creation(self, mobilenet_model_id,
+                                      filter_size_scale):
+    """Test for creation of a MobileNet classifier."""
+    mobilenet_params = {
+        ('MobileNetV1', 1.0): 4254889,
+        ('MobileNetV1', 0.75): 2602745,
+        ('MobileNetV2', 1.0): 3540265,
+        ('MobileNetV2', 0.75): 2664345,
+        ('MobileNetV3Large', 1.0): 5508713,
+        ('MobileNetV3Large', 0.75): 4013897,
+        ('MobileNetV3Small', 1.0): 2555993,
+        ('MobileNetV3Small', 0.75): 2052577,
+        ('MobileNetV3EdgeTPU', 1.0): 4131593,
+        ('MobileNetV3EdgeTPU', 0.75): 3019569,
+    }
+    inputs = np.random.rand(2, 224, 224, 3)
+    tf.keras.backend.set_image_data_format('channels_last')
+    backbone = backbones.MobileNet(
+        model_id=mobilenet_model_id, filter_size_scale=filter_size_scale)
+    num_classes = 1001
+    model = classification_model.ClassificationModel(
+        backbone=backbone,
+        num_classes=num_classes,
+        dropout_rate=0.2,
+    )
+    self.assertEqual(model.count_params(),
+                     mobilenet_params[(mobilenet_model_id, filter_size_scale)])
+    logits = model(inputs)
+    self.assertAllEqual([2, num_classes], logits.numpy().shape)
  @combinations.generate(
      combinations.combine(
          strategy=[
@@ -129,7 +175,7 @@ class ClassificationNetworkTest(parameterized.TestCase, tf.test.TestCase):
      _ = model(inputs)
  def test_serialize_deserialize(self):
-    """Validate the classification network can be serialized and deserialized."""
+    """Validate the classification net can be serialized and deserialized."""
    tf.keras.backend.set_image_data_format('channels_last')
    backbone = backbones.ResNet(model_id=50)

--- a/official/vision/beta/modeling/layers/nn_blocks.py
+++ b/official/vision/beta/modeling/layers/nn_blocks.py
@@ -14,10 +14,10 @@
 # ==============================================================================
 """Contains common building blocks for neural networks."""
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, Text
 # Import libraries
+from absl import logging
 import tensorflow as tf
 from official.modeling import tf_utils
@@ -391,7 +391,16 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
               kernel_regularizer=None,
               bias_regularizer=None,
               activation='relu',
+               se_inner_activation='relu',
+               se_gating_activation='sigmoid',
+               expand_se_in_filters=False,
+               depthwise_activation=None,
               use_sync_bn=False,
+               dilation_rate=1,
+               divisible_by=1,
+               regularize_depthwise=False,
+               use_depthwise=True,
+               use_residual=True,
               norm_momentum=0.99,
               norm_epsilon=0.001,
               **kwargs):
@@ -414,7 +423,24 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
        Default to None.
      activation: `str` name of the activation function.
+      se_inner_activation: Squeeze excitation inner activation.
+      se_gating_activation: Squeeze excitation gating activation.
+      expand_se_in_filters: Whether or not to expand in_filter in squeeze and
+        excitation layer.
+      depthwise_activation: `str` name of the activation function for depthwise
+        only.
      use_sync_bn: if True, use synchronized batch normalization.
+      dilation_rate: `int` an integer specifying the dilation rate to use for.
+      divisible_by: `int` ensures all inner dimensions are divisible by this
+        number.
+      dilated convolution. Can be a single integer to specify the same value for
+      all spatial dimensions.
+      regularize_depthwise: `bool` whether or not apply regularization on
+        depthwise.
+      use_depthwise: `bool` whether to uses fused convolutions instead of
+        depthwise.
+      use_residual: `bool`whether to include residual connection between input
+      and output.
      norm_momentum: `float` normalization omentum for the moving average.
      norm_epsilon: `float` small float added to variance to avoid dividing by
        zero.
@@ -428,14 +454,23 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
    self._strides = strides
    self._kernel_size = kernel_size
    self._se_ratio = se_ratio
+    self._divisible_by = divisible_by
    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
+    self._dilation_rate = dilation_rate
    self._use_sync_bn = use_sync_bn
+    self._regularize_depthwise = regularize_depthwise
+    self._use_depthwise = use_depthwise
+    self._use_residual = use_residual
    self._activation = activation
+    self._se_inner_activation = se_inner_activation
+    self._se_gating_activation = se_gating_activation
+    self._depthwise_activation = depthwise_activation
    self._kernel_initializer = kernel_initializer
    self._norm_momentum = norm_momentum
    self._norm_epsilon = norm_epsilon
    self._kernel_regularizer = kernel_regularizer
    self._bias_regularizer = bias_regularizer
+    self._expand_se_in_filters = expand_se_in_filters
    if use_sync_bn:
      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
@@ -446,14 +481,30 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
    else:
      self._bn_axis = 1
    self._activation_fn = tf_utils.get_activation(activation)
+    if not depthwise_activation:
+      self._depthwise_activation = activation
+    self._depthwise_activation_fn = tf_utils.get_activation(
+        self._depthwise_activation)
+    if regularize_depthwise:
+      self._depthsize_regularizer = kernel_regularizer
+    else:
+      self._depthsize_regularizer = None
  def build(self, input_shape):
-    if self._expand_ratio != 1:
+    expand_filters = self._in_filters
+    if self._expand_ratio > 1:
      # First 1x1 conv for channel expansion.
+      expand_filters = nn_layers.make_divisible(
+          self._in_filters * self._expand_ratio, self._divisible_by)
+      expand_kernel = 1 if self._use_depthwise else self._kernel_size
+      expand_stride = 1 if self._use_depthwise else self._strides
      self._conv0 = tf.keras.layers.Conv2D(
-          filters=self._in_filters * self._expand_ratio,
+          filters=expand_filters,
-          kernel_size=1,
+          kernel_size=expand_kernel,
-          strides=1,
+          strides=expand_stride,
+          padding='same',
          use_bias=False,
          kernel_initializer=self._kernel_initializer,
          kernel_regularizer=self._kernel_regularizer,
@@ -463,29 +514,39 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
          momentum=self._norm_momentum,
          epsilon=self._norm_epsilon)
-    # Depthwise conv.
+    if self._use_depthwise:
-    self._conv1 = tf.keras.layers.DepthwiseConv2D(
+      # Depthwise conv.
-        kernel_size=(self._kernel_size, self._kernel_size),
+      self._conv1 = tf.keras.layers.DepthwiseConv2D(
-        strides=self._strides,
+          kernel_size=(self._kernel_size, self._kernel_size),
-        padding='same',
+          strides=self._strides,
-        use_bias=False,
+          padding='same',
-        depthwise_initializer=self._kernel_initializer,
+          depth_multiplier=1,
-        depthwise_regularizer=self._kernel_regularizer,
+          dilation_rate=self._dilation_rate,
-        bias_regularizer=self._bias_regularizer)
+          use_bias=False,
-    self._norm1 = self._norm(
+          depthwise_initializer=self._kernel_initializer,
-        axis=self._bn_axis,
+          depthwise_regularizer=self._depthsize_regularizer,
-        momentum=self._norm_momentum,
+          bias_regularizer=self._bias_regularizer)
-        epsilon=self._norm_epsilon)
+      self._norm1 = self._norm(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
    # Squeeze and excitation.
-    if self._se_ratio is not None and self._se_ratio > 0 and self._se_ratio <= 1:
+    if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1:
+      logging.info('Use Squeeze and excitation.')
+      in_filters = self._in_filters
+      if self._expand_se_in_filters:
+        in_filters = expand_filters
      self._squeeze_excitation = nn_layers.SqueezeExcitation(
-          in_filters=self._in_filters,
+          in_filters=in_filters,
+          out_filters=expand_filters,
          se_ratio=self._se_ratio,
-          expand_ratio=self._expand_ratio,
+          divisible_by=self._divisible_by,
          kernel_initializer=self._kernel_initializer,
          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)
+          bias_regularizer=self._bias_regularizer,
+          activation=self._se_inner_activation,
+          gating_activation=self._se_gating_activation)
    else:
      self._squeeze_excitation = None
@@ -494,6 +555,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
        filters=self._out_filters,
        kernel_size=1,
        strides=1,
+        padding='same',
        use_bias=False,
        kernel_initializer=self._kernel_initializer,
        kernel_regularizer=self._kernel_regularizer,
@@ -519,12 +581,21 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
        'strides': self._strides,
        'kernel_size': self._kernel_size,
        'se_ratio': self._se_ratio,
+        'divisible_by': self._divisible_by,
        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
        'kernel_initializer': self._kernel_initializer,
        'kernel_regularizer': self._kernel_regularizer,
        'bias_regularizer': self._bias_regularizer,
        'activation': self._activation,
+        'se_inner_activation': self._se_inner_activation,
+        'se_gating_activation': self._se_gating_activation,
+        'expand_se_in_filters': self._expand_se_in_filters,
+        'depthwise_activation': self._depthwise_activation,
+        'dilation_rate': self._dilation_rate,
        'use_sync_bn': self._use_sync_bn,
+        'regularize_depthwise': self._regularize_depthwise,
+        'use_depthwise': self._use_depthwise,
+        'use_residual': self._use_residual,
        'norm_momentum': self._norm_momentum,
        'norm_epsilon': self._norm_epsilon
    }
@@ -533,16 +604,17 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
  def call(self, inputs, training=None):
    shortcut = inputs
-    if self._expand_ratio != 1:
+    if self._expand_ratio > 1:
      x = self._conv0(inputs)
      x = self._norm0(x)
      x = self._activation_fn(x)
    else:
      x = inputs
-    x = self._conv1(x)
+    if self._use_depthwise:
-    x = self._norm1(x)
+      x = self._conv1(x)
-    x = self._activation_fn(x)
+      x = self._norm1(x)
+      x = self._depthwise_activation_fn(x)
    if self._squeeze_excitation:
      x = self._squeeze_excitation(x)
@@ -550,7 +622,9 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
    x = self._conv2(x)
    x = self._norm2(x)
-    if self._in_filters == self._out_filters and self._strides == 1:
+    if (self._use_residual and
+        self._in_filters == self._out_filters and
+        self._strides == 1):
      if self._stochastic_depth:
        x = self._stochastic_depth(x, training=training)
      x = tf.add(x, shortcut)
@@ -570,9 +644,8 @@ class ResidualInner(tf.keras.layers.Layer):
      self,
      filters: int,
      strides: int,
-      kernel_initializer: Union[
+      kernel_initializer: Union[str, Callable[
-          str, Callable[..., tf.keras.initializers.Initializer]]
+          ..., tf.keras.initializers.Initializer]] = 'VarianceScaling',
-      = 'VarianceScaling',
      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
      activation: Union[str, Callable[..., tf.Tensor]] = 'relu',
      use_sync_bn: bool = False,
@@ -693,9 +766,8 @@ class BottleneckResidualInner(tf.keras.layers.Layer):
      self,
      filters: int,
      strides: int,
-      kernel_initializer: Union[
+      kernel_initializer: Union[str, Callable[
-          str, Callable[..., tf.keras.initializers.Initializer]]
+          ..., tf.keras.initializers.Initializer]] = 'VarianceScaling',
-      = 'VarianceScaling',
      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
      activation: Union[str, Callable[..., tf.Tensor]] = 'relu',
      use_sync_bn: bool = False,
@@ -886,8 +958,10 @@ class ReversibleLayer(tf.keras.layers.Layer):
      self, inputs: tf.Tensor, training: Optional[bool] = None) -> tf.Tensor:
    @tf.custom_gradient
-    def reversible(x: tf.Tensor) -> Tuple[
+    def reversible(
-        tf.Tensor, Callable[[Any], Tuple[List[tf.Tensor], List[tf.Tensor]]]]:
+        x: tf.Tensor
+    ) -> Tuple[tf.Tensor, Callable[[Any], Tuple[List[tf.Tensor],
+                                                List[tf.Tensor]]]]:
      """Implements Algorithm 1 in RevNet paper.
      Paper: https://arxiv.org/pdf/1707.04585.pdf
@@ -926,7 +1000,7 @@ class ReversibleLayer(tf.keras.layers.Layer):
      def grad_fn(dy: tf.Tensor,
                  variables: Optional[List[tf.Variable]] = None,
-                 ) -> Tuple[List[tf.Tensor], List[tf.Tensor]]:
+                  ) -> Tuple[List[tf.Tensor], List[tf.Tensor]]:
        """Given dy calculate (dy/dx)|_{x_{input}} using f/g."""
        if irreversible or not self._manual_grads:
          grads_combined = fwdtape.gradient(
@@ -947,11 +1021,11 @@ class ReversibleLayer(tf.keras.layers.Layer):
          self_to_var_index = [fg_var_refs.index(v.ref()) for v in variables]
          # Algorithm 1 in paper (line # documented in-line)
-          z1 = y1_nograd                                         # line 2
+          z1 = y1_nograd  # line 2
          with tf.GradientTape() as gtape:
            gtape.watch(z1)
            g_z1 = self._g(z1, training=training)
-          x2 = y2_nograd - g_z1                                  # line 3
+          x2 = y2_nograd - g_z1  # line 3
          with tf.GradientTape() as ftape:
            ftape.watch(x2)
@@ -963,16 +1037,16 @@ class ReversibleLayer(tf.keras.layers.Layer):
              g_z1,
              [z1] + self._g.trainable_variables,
              output_gradients=dy2)
-          dz1 = dy1 + g_grads_combined[0]                        # line 5
+          dz1 = dy1 + g_grads_combined[0]  # line 5
-          dwg = g_grads_combined[1:]                             # line 9
+          dwg = g_grads_combined[1:]  # line 9
          f_grads_combined = ftape.gradient(
              f_x2,
              [x2] + self._f.trainable_variables,
              output_gradients=dz1)
-          dx2 = dy2 + f_grads_combined[0]                        # line 6
+          dx2 = dy2 + f_grads_combined[0]  # line 6
-          dwf = f_grads_combined[1:]                             # line 8
+          dwf = f_grads_combined[1:]  # line 8
-          dx1 = dz1                                              # line 7
+          dx1 = dz1  # line 7
          # Pack the input and variable gradients.
          dx = tf.concat([dx1, dx2], axis=self._axis)
@@ -989,3 +1063,130 @@ class ReversibleLayer(tf.keras.layers.Layer):
    activations = reversible(inputs)
    return activations
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class DepthwiseSeparableConvBlock(tf.keras.layers.Layer):
+  """An depthwise separable convolution block with batch normalization."""
+  def __init__(
+      self,
+      filters: int,
+      kernel_size: int = 3,
+      strides: int = 1,
+      regularize_depthwise=False,
+      activation: Text = 'relu6',
+      kernel_initializer: Text = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      dilation_rate: int = 1,
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      **kwargs):
+    """An convolution block with batch normalization.
+    Args:
+      filters: `int` number of filters for the first two convolutions. Note that
+        the third and final convolution will use 4 times as many filters.
+      kernel_size: `int` an integer specifying the height and width of the
+      2D convolution window.
+      strides: `int` block stride. If greater than 1, this block will ultimately
+        downsample the input.
+      regularize_depthwise: if Ture, apply regularization on depthwise.
+      activation: `str` name of the activation function.
+      kernel_initializer: kernel_initializer for convolutional layers.
+      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
+                          Default to None.
+      dilation_rate: an integer or tuple/list of 2 integers, specifying
+        the dilation rate to use for dilated convolution.
+        Can be a single integer to specify the same value for
+        all spatial dimensions.
+      use_sync_bn: if True, use synchronized batch normalization.
+      norm_momentum: `float` normalization omentum for the moving average.
+      norm_epsilon: `float` small float added to variance to avoid dividing by
+        zero.
+      **kwargs: keyword arguments to be passed.
+    """
+    super(DepthwiseSeparableConvBlock, self).__init__(**kwargs)
+    self._filters = filters
+    self._kernel_size = kernel_size
+    self._strides = strides
+    self._activation = activation
+    self._regularize_depthwise = regularize_depthwise
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._dilation_rate = dilation_rate
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    if use_sync_bn:
+      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = tf.keras.layers.BatchNormalization
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._activation_fn = tf_utils.get_activation(activation)
+    if regularize_depthwise:
+      self._depthsize_regularizer = kernel_regularizer
+    else:
+      self._depthsize_regularizer = None
+  def get_config(self):
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'regularize_depthwise': self._regularize_depthwise,
+        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    base_config = super(DepthwiseSeparableConvBlock, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+  def build(self, input_shape):
+    self._dwconv0 = tf.keras.layers.DepthwiseConv2D(
+        kernel_size=self._kernel_size,
+        strides=self._strides,
+        padding='same',
+        depth_multiplier=1,
+        dilation_rate=self._dilation_rate,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._depthsize_regularizer,
+        use_bias=False)
+    self._norm0 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    self._conv1 = tf.keras.layers.Conv2D(
+        filters=self._filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer)
+    self._norm1 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    super(DepthwiseSeparableConvBlock, self).build(input_shape)
+  def call(self, inputs, training=None):
+    x = self._dwconv0(inputs)
+    x = self._norm0(x)
+    x = self._activation_fn(x)
+    x = self._conv1(x)
+    x = self._norm1(x)
+    return self._activation_fn(x)
--- a/official/vision/beta/modeling/layers/nn_layers.py
+++ b/official/vision/beta/modeling/layers/nn_layers.py
@@ -14,46 +14,99 @@
 # ==============================================================================
 """Contains common building blocks for neural networks."""
+from typing import Optional
 # Import libraries
+from absl import logging
 import tensorflow as tf
 from official.modeling import tf_utils
+def make_divisible(value: float,
+                   divisor: int,
+                   min_value: Optional[float] = None
+                   ) -> int:
+  """This is to ensure that all layers have channels that are divisible by 8.
+  Args:
+    value: `float` original value.
+    divisor: `int` the divisor that need to be checked upon.
+    min_value: `float` minimum value threshold.
+  Returns:
+    The adjusted value in `int` that divisible against divisor.
+  """
+  if min_value is None:
+    min_value = divisor
+  new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
+  # Make sure that round down does not go down by more than 10%.
+  if new_value < 0.9 * value:
+    new_value += divisor
+  return new_value
+def round_filters(filters: int,
+                  multiplier: float,
+                  divisor: int = 8,
+                  min_depth: Optional[int] = None,
+                  skip: bool = False):
+  """Round number of filters based on width multiplier."""
+  orig_f = filters
+  if skip or not multiplier:
+    return filters
+  new_filters = make_divisible(value=filters * multiplier,
+                               divisor=divisor,
+                               min_value=min_depth)
+  logging.info('round_filter input=%s output=%s', orig_f, new_filters)
+  return int(new_filters)
 @tf.keras.utils.register_keras_serializable(package='Vision')
 class SqueezeExcitation(tf.keras.layers.Layer):
  """Squeeze and excitation layer."""
  def __init__(self,
               in_filters,
+               out_filters,
               se_ratio,
-               expand_ratio,
+               divisible_by=1,
               kernel_initializer='VarianceScaling',
               kernel_regularizer=None,
               bias_regularizer=None,
               activation='relu',
+               gating_activation='sigmoid',
               **kwargs):
    """Implementation for squeeze and excitation.
    Args:
      in_filters: `int` number of filters of the input tensor.
+      out_filters: `int` number of filters of the output tensor.
      se_ratio: `float` or None. If not None, se ratio for the squeeze and
        excitation layer.
-      expand_ratio: `int` expand_ratio for a MBConv block.
+      divisible_by: `int` ensures all inner dimensions are divisible by this
+        number.
      kernel_initializer: kernel_initializer for convolutional layers.
      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
        Default to None.
      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
        Default to None.
      activation: `str` name of the activation function.
+      gating_activation: `str` name of the activation function for final gating
+        function.
      **kwargs: keyword arguments to be passed.
    """
    super(SqueezeExcitation, self).__init__(**kwargs)
    self._in_filters = in_filters
+    self._out_filters = out_filters
    self._se_ratio = se_ratio
-    self._expand_ratio = expand_ratio
+    self._divisible_by = divisible_by
    self._activation = activation
+    self._gating_activation = gating_activation
    self._kernel_initializer = kernel_initializer
    self._kernel_regularizer = kernel_regularizer
    self._bias_regularizer = bias_regularizer
@@ -62,9 +115,12 @@ class SqueezeExcitation(tf.keras.layers.Layer):
    else:
      self._spatial_axis = [2, 3]
    self._activation_fn = tf_utils.get_activation(activation)
+    self._gating_activation_fn = tf_utils.get_activation(gating_activation)
  def build(self, input_shape):
-    num_reduced_filters = max(1, int(self._in_filters * self._se_ratio))
+    num_reduced_filters = make_divisible(
+        max(1, int(self._in_filters * self._se_ratio)),
+        divisor=self._divisible_by)
    self._se_reduce = tf.keras.layers.Conv2D(
        filters=num_reduced_filters,
@@ -77,7 +133,7 @@ class SqueezeExcitation(tf.keras.layers.Layer):
        bias_regularizer=self._bias_regularizer)
    self._se_expand = tf.keras.layers.Conv2D(
-        filters=self._in_filters * self._expand_ratio,
+        filters=self._out_filters,
        kernel_size=1,
        strides=1,
        padding='same',
@@ -91,22 +147,24 @@ class SqueezeExcitation(tf.keras.layers.Layer):
  def get_config(self):
    config = {
        'in_filters': self._in_filters,
+        'out_filters': self._out_filters,
        'se_ratio': self._se_ratio,
-        'expand_ratio': self._expand_ratio,
+        'divisible_by': self._divisible_by,
        'strides': self._strides,
        'kernel_initializer': self._kernel_initializer,
        'kernel_regularizer': self._kernel_regularizer,
        'bias_regularizer': self._bias_regularizer,
        'activation': self._activation,
+        'gating_activation': self._gating_activation,
    }
    base_config = super(SqueezeExcitation, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))
  def call(self, inputs):
    x = tf.reduce_mean(inputs, self._spatial_axis, keepdims=True)
-    x = self._se_expand(self._activation_fn(self._se_reduce(x)))
+    x = self._activation_fn(self._se_reduce(x))
+    x = self._gating_activation_fn(self._se_expand(x))
-    return tf.sigmoid(x) * inputs
+    return x * inputs
 @tf.keras.utils.register_keras_serializable(package='Vision')