[MobileNet] Add Mobilenet Backbone Implementation (#9303)

* factor make_divisible function and move round_filters to nn_layers * modify SqueezeExcitation to add two additional parameter: divisible_by and gating_activation * modify the InvertedBottleneckBlock to include 1. use_depthwise, 2. use_residual, 3. regularize_depthwise additional boolean flag; Add control for depthwise activation and regularizer; remove expand_ratio from SqueezeExcitation * add Conv2DBNBlock definition * add mobilenet v2, v3 implementation * add mobilenet v1 * put mobilenet_base into class body * fix a type hint error * the invertedbottlenetblock is different for mobilenet and efficientnet. Made necessary changes to cope both. * add target_backbone while call invertedbottleneckblock * add relu6 and hard_sigmoid * add test for mobilenet * add mobilenet to factory * fix some typo; link the reference to the architectures * remove future import Co-authored-by: Shixin Luo <luoshixin@google.com>

[MobileNet] Add Mobilenet Backbone Implementation (#9303)
* factor make_divisible function and move round_filters to nn_layers * modify SqueezeExcitation to add two additional parameter: divisible_by and gating_activation * modify the InvertedBottleneckBlock to include 1. use_depthwise, 2. use_residual, 3. regularize_depthwise additional boolean flag; Add control for depthwise activation and regularizer; remove expand_ratio from SqueezeExcitation * add Conv2DBNBlock definition * add mobilenet v2, v3 implementation * add mobilenet v1 * put mobilenet_base into class body * fix a type hint error * the invertedbottlenetblock is different for mobilenet and efficientnet. Made necessary changes to cope both. * add target_backbone while call invertedbottleneckblock * add relu6 and hard_sigmoid * add test for mobilenet * add mobilenet to factory * fix some typo; link the reference to the architectures * remove future import Co-authored-by: Shixin Luo <luoshixin@google.com>
e61588cd · Shixin · GitHub · 2f737e1e · e61588cd · e61588cd
Unverified Commit e61588cd authored Sep 28, 2020 by Shixin Committed by GitHub Sep 28, 2020
15 changed files
--- a/official/modeling/activations/__init__.py
+++ b/official/modeling/activations/__init__.py
@@ -17,3 +17,5 @@ from official.modeling.activations.gelu import gelu
 from official.modeling.activations.swish import hard_swish
 from official.modeling.activations.swish import identity
 from official.modeling.activations.swish import simple_swish
+from official.modeling.activations.relu import relu6
+from official.modeling.activations.sigmoid import hard_sigmoid
--- a/official/modeling/activations/relu.py
+++ b/official/modeling/activations/relu.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Customized Relu activation."""
+
+import tensorflow as tf
+
+
+@tf.keras.utils.register_keras_serializable(package='Text')
+def relu6(features):
+  """Computes the Relu6 activation function.
+
+  Args:
+    features: A `Tensor` representing preactivation values.
+
+  Returns:
+    The activation value.
+  """
+  features = tf.convert_to_tensor(features)
+  return tf.nn.relu6(features)
--- a/official/modeling/activations/relu_test.py
+++ b/official/modeling/activations/relu_test.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the customized Relu activation."""
+
+import tensorflow as tf
+
+from tensorflow.python.keras import \
+  keras_parameterized  # pylint: disable=g-direct-tensorflow-import
+from official.modeling import activations
+
+
+@keras_parameterized.run_all_keras_modes
+class CustomizedReluTest(keras_parameterized.TestCase):
+
+  def test_relu6(self):
+    features = [[.25, 0, -.25], [-1, -2, 3]]
+    customized_relu6_data = activations.relu6(features)
+    relu6_data = tf.nn.relu6(features)
+    self.assertAllClose(customized_relu6_data, relu6_data)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/modeling/activations/sigmoid.py
+++ b/official/modeling/activations/sigmoid.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Customized Sigmoid activation."""
+
+import tensorflow as tf
+
+
+@tf.keras.utils.register_keras_serializable(package='Text')
+def hard_sigmoid(features):
+  """Computes the hard sigmoid activation function.
+
+  Args:
+    features: A `Tensor` representing preactivation values.
+
+  Returns:
+    The activation value.
+  """
+  features = tf.convert_to_tensor(features)
+  return tf.nn.relu6(features + tf.constant(3.)) * 0.16667
--- a/official/modeling/activations/sigmoid_test.py
+++ b/official/modeling/activations/sigmoid_test.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the customized Sigmoid activation."""
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.keras import \
+  keras_parameterized  # pylint: disable=g-direct-tensorflow-import
+from official.modeling import activations
+
+
+@keras_parameterized.run_all_keras_modes
+class CustomizedSigmoidTest(keras_parameterized.TestCase):
+
+  def _hard_sigmoid_nn(self, x):
+    x = np.float32(x)
+    return tf.nn.relu6(x + 3.) * 0.16667
+
+  def test_hard_sigmoid(self):
+    features = [[.25, 0, -.25], [-1, -2, 3]]
+    customized_hard_sigmoid_data = activations.hard_sigmoid(features)
+    sigmoid_data = self._hard_sigmoid_nn(features)
+    self.assertAllClose(customized_hard_sigmoid_data, sigmoid_data)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/modeling/tf_utils.py
+++ b/official/modeling/tf_utils.py
@@ -104,6 +104,8 @@ def get_activation(identifier):
        "gelu": activations.gelu,
        "simple_swish": activations.simple_swish,
        "hard_swish": activations.hard_swish,
+        "relu6": activations.relu6,
+        "hard_sigmoid": activations.hard_sigmoid,
        "identity": activations.identity,
    }
    identifier = str(identifier).lower()

--- a/official/vision/beta/configs/backbones.py
+++ b/official/vision/beta/configs/backbones.py
@@ -36,6 +36,14 @@ class EfficientNet(hyperparams.Config):
  se_ratio: float = 0.0


+@dataclasses.dataclass
+class MobileNet(hyperparams.Config):
+  """Mobilenet config."""
+  model_id: str = 'MobileNetV2'
+  width_multiplier: float = 1.0
+  stochastic_depth_drop_rate: float = 0.0
+
+
 @dataclasses.dataclass
 class SpineNet(hyperparams.Config):
  """SpineNet config."""
@@ -59,9 +67,11 @@ class Backbone(hyperparams.OneOfConfig):
    revnet: revnet backbone config.
    efficientnet: efficientnet backbone config.
    spinenet: spinenet backbone config.
+    mobilenet: mobilenet backbone config.
  """
  type: Optional[str] = None
  resnet: ResNet = ResNet()
  revnet: RevNet = RevNet()
  efficientnet: EfficientNet = EfficientNet()
  spinenet: SpineNet = SpineNet()
+  mobilenet: MobileNet = MobileNet()
--- a/official/vision/beta/modeling/backbones/__init__.py
+++ b/official/vision/beta/modeling/backbones/__init__.py
@@ -20,3 +20,4 @@ from official.vision.beta.modeling.backbones.resnet import ResNet
 from official.vision.beta.modeling.backbones.resnet_3d import ResNet3D
 from official.vision.beta.modeling.backbones.revnet import RevNet
 from official.vision.beta.modeling.backbones.spinenet import SpineNet
+from official.vision.beta.modeling.backbones.mobilenet import MobileNet
--- a/official/vision/beta/modeling/backbones/efficientnet.py
+++ b/official/vision/beta/modeling/backbones/efficientnet.py
@@ -20,6 +20,7 @@ from absl import logging
 import tensorflow as tf
 from official.modeling import tf_utils
 from official.vision.beta.modeling.layers import nn_blocks
+from official.vision.beta.modeling.layers import nn_layers

 layers = tf.keras.layers

@@ -49,22 +50,6 @@ SCALING_MAP = {
 }


-def round_filters(filters, multiplier, divisor=8, min_depth=None, skip=False):
-  """Round number of filters based on depth multiplier."""
-  orig_f = filters
-  if skip or not multiplier:
-    return filters
-
-  filters *= multiplier
-  min_depth = min_depth or divisor
-  new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
-  # Make sure that round down does not go down by more than 10%.
-  if new_filters < 0.9 * filters:
-    new_filters += divisor
-  logging.info('round_filter input=%s output=%s', orig_f, new_filters)
-  return int(new_filters)
-
-
 def round_repeats(repeats, multiplier, skip=False):
  """Round number of filters based on depth multiplier."""
  if skip or not multiplier:
@@ -95,8 +80,8 @@ class BlockSpec(object):
    self.kernel_size = kernel_size
    self.strides = strides
    self.expand_ratio = expand_ratio
-    self.in_filters = round_filters(in_filters, width_scale)
-    self.out_filters = round_filters(out_filters, width_scale)
+    self.in_filters = nn_layers.round_filters(in_filters, width_scale)
+    self.out_filters = nn_layers.round_filters(out_filters, width_scale)
    self.is_output = is_output


@@ -165,7 +150,7 @@ class EfficientNet(tf.keras.Model):

    # Build stem.
    x = layers.Conv2D(
-        filters=round_filters(32, width_scale),
+        filters=nn_layers.round_filters(32, width_scale),
        kernel_size=3,
        strides=2,
        use_bias=False,
@@ -197,7 +182,7 @@ class EfficientNet(tf.keras.Model):

    # Build the final conv for classification.
    x = layers.Conv2D(
-        filters=round_filters(1280, width_scale),
+        filters=nn_layers.round_filters(1280, width_scale),
        kernel_size=1,
        strides=1,
        use_bias=False,

--- a/official/vision/beta/modeling/backbones/factory.py
+++ b/official/vision/beta/modeling/backbones/factory.py
@@ -87,6 +87,16 @@ def build_backbone(input_specs: tf.keras.layers.InputSpec,
        norm_momentum=norm_activation_config.norm_momentum,
        norm_epsilon=norm_activation_config.norm_epsilon,
        kernel_regularizer=l2_regularizer)
+  elif backbone_type == 'mobilenet':
+    backbone = backbones.MobileNet(
+        model_id=backbone_cfg.model_id,
+        width_multiplier=backbone_cfg.width_multiplier,
+        input_specs=input_specs,
+        stochastic_depth_drop_rate=backbone_cfg.stochastic_depth_drop_rate,
+        use_sync_bn=norm_activation_config.use_sync_bn,
+        norm_momentum=norm_activation_config.norm_momentum,
+        norm_epsilon=norm_activation_config.norm_epsilon,
+        kernel_regularizer=l2_regularizer)
  else:
    raise ValueError('Backbone {!r} not implement'.format(backbone_type))


--- a/official/vision/beta/modeling/backbones/factory_test.py
+++ b/official/vision/beta/modeling/backbones/factory_test.py
@@ -86,7 +86,41 @@ class FactoryTest(tf.test.TestCase, parameterized.TestCase):

    self.assertEqual(network_config, factory_network_config)

-  @combinations.generate(combinations.combine(model_id=['49'],))
+  @combinations.generate(
+      combinations.combine(
+          model_id=['MobileNetV1', 'MobileNetV2',
+                    'MobileNetV3Large', 'MobileNetV3Small',
+                    'MobileNetV3EdgeTPU'],
+          width_multiplier=[1.0, 0.75],
+      ))
+  def test_mobilenet_creation(self, model_id, width_multiplier):
+    """Test creation of Mobilenet models."""
+
+    network = backbones.MobileNet(
+        model_id=model_id,
+        width_multiplier=width_multiplier,
+        norm_momentum=0.99,
+        norm_epsilon=1e-5)
+
+    backbone_config = backbones_cfg.Backbone(
+        type='mobilenet',
+        mobilenet=backbones_cfg.MobileNet(
+            model_id=model_id, width_multiplier=width_multiplier))
+    norm_activation_config = common_cfg.NormActivation(
+        norm_momentum=0.99, norm_epsilon=1e-5)
+    model_config = retinanet_cfg.RetinaNet(
+        backbone=backbone_config, norm_activation=norm_activation_config)
+
+    factory_network = factory.build_backbone(
+        input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
+        model_config=model_config)
+
+    network_config = network.get_config()
+    factory_network_config = factory_network.get_config()
+
+    self.assertEqual(network_config, factory_network_config)
+
+  @combinations.generate(combinations.combine(model_id=['49'], ))
  def test_spinenet_creation(self, model_id):
    """Test creation of SpineNet models."""
    input_size = 128

--- a/official/vision/beta/modeling/backbones/mobilenet.py
+++ b/official/vision/beta/modeling/backbones/mobilenet.py
--- a/official/vision/beta/modeling/backbones/mobilenet_test.py
+++ b/official/vision/beta/modeling/backbones/mobilenet_test.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for MobileNet."""
+
+# Import libraries
+from absl.testing import parameterized
+from itertools import product
+
+import tensorflow as tf
+
+from official.vision.beta.modeling.backbones import mobilenet
+
+
+class MobileNetTest(parameterized.TestCase, tf.test.TestCase):
+  @parameterized.parameters('MobileNetV1', 'MobileNetV2',
+                            'MobileNetV3Large', 'MobileNetV3Small',
+                            'MobileNetV3EdgeTPU')
+  def test_serialize_deserialize(self, model_id):
+    # Create a network object that sets all of its config options.
+    kwargs = dict(
+        model_id=model_id,
+        width_multiplier=1.0,
+        stochastic_depth_drop_rate=None,
+        use_sync_bn=False,
+        kernel_initializer='VarianceScaling',
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        norm_momentum=0.99,
+        norm_epsilon=0.001,
+        output_stride=None,
+        min_depth=8,
+        divisible_by=8,
+        regularize_depthwise=False,
+        finegrain_classification_mode=True
+    )
+    network = mobilenet.MobileNet(**kwargs)
+
+    expected_config = dict(kwargs)
+    self.assertEqual(network.get_config(), expected_config)
+
+    # Create another network object from the first object's config.
+    new_network = mobilenet.MobileNet.from_config(network.get_config())
+
+    # Validate that the config can be forced to JSON.
+    _ = new_network.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(network.get_config(), new_network.get_config())
+
+  @parameterized.parameters(
+      product((1, 3),
+              ('MobileNetV1', 'MobileNetV2',
+               'MobileNetV3Large', 'MobileNetV3Small',
+               'MobileNetV3EdgeTPU'))
+  )
+  def test_input_specs(self, input_dim, model_id):
+    """Test different input feature dimensions."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
+    network = mobilenet.MobileNet(model_id=model_id, input_specs=input_specs)
+
+    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
+    _ = network(inputs)
+
+  @parameterized.parameters(32, 224)
+  def test_mobilenet_v1_creation(self, input_size):
+    """Test creation of EfficientNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    network = mobilenet.MobileNet(model_id='MobileNetV1', width_multiplier=0.75)
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 24],
+                        endpoints[1].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 48],
+                        endpoints[2].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 96],
+                        endpoints[3].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 96],
+                        endpoints[4].shape.as_list())
+
+  @parameterized.parameters(32, 224)
+  def test_mobilenet_v2_creation(self, input_size):
+    """Test creation of EfficientNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    network = mobilenet.MobileNet(model_id='MobileNetV2', width_multiplier=1.0)
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 32],
+                        endpoints[1].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 16],
+                        endpoints[2].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[3].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[4].shape.as_list())
+
+  @parameterized.parameters(32, 224)
+  def test_mobilenet_v3_small_creation(self, input_size):
+    """Test creation of EfficientNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    network = mobilenet.MobileNet(model_id='MobileNetV3Small',
+                                  width_multiplier=0.75)
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 16],
+                        endpoints[1].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 16],
+                        endpoints[2].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 3, input_size / 2 ** 3, 24],
+                        endpoints[3].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 3, input_size / 2 ** 3, 24],
+                        endpoints[4].shape.as_list())
+
+  @parameterized.parameters(32, 224)
+  def test_mobilenet_v3_large_creation(self, input_size):
+    """Test creation of EfficientNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    network = mobilenet.MobileNet(model_id='MobileNetV3Large',
+                                  width_multiplier=0.75)
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 16],
+                        endpoints[1].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 16],
+                        endpoints[2].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[3].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[4].shape.as_list())
+
+  @parameterized.parameters(32, 224)
+  def test_mobilenet_v3_edgetpu_creation(self, input_size):
+    """Test creation of EfficientNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    network = mobilenet.MobileNet(model_id='MobileNetV3EdgeTPU',
+                                  width_multiplier=0.75)
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 24],
+                        endpoints[1].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 1, input_size / 2 ** 1, 16],
+                        endpoints[2].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[3].shape.as_list())
+    self.assertAllEqual([1, input_size / 2 ** 2, input_size / 2 ** 2, 24],
+                        endpoints[4].shape.as_list())
+
+  @parameterized.parameters(1.0, 0.75)
+  def test_mobilenet_v1_scaling(self, width_multiplier):
+    mobilenet_v1_params = {
+        1.0: 3228864,
+        0.75: 1832976
+    }
+
+    input_size = 224
+    network = mobilenet.MobileNet(model_id='MobileNetV1',
+                                  width_multiplier=width_multiplier)
+    self.assertEqual(network.count_params(),
+                     mobilenet_v1_params[width_multiplier])
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
+
+  @parameterized.parameters(1.0, 0.75)
+  def test_mobilenet_v2_scaling(self, width_multiplier):
+    mobilenet_v2_params = {
+        1.0: 2257984,
+        0.75: 1382064
+    }
+
+    input_size = 224
+    network = mobilenet.MobileNet(model_id='MobileNetV2',
+                                  width_multiplier=width_multiplier)
+    self.assertEqual(network.count_params(),
+                     mobilenet_v2_params[width_multiplier])
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
+
+  @parameterized.parameters(1.0, 0.75)
+  def test_mobilenet_v3_large_scaling(self, width_multiplier):
+    mobilenet_v3_large_params = {
+        1.0: 4226432,
+        0.75: 2731616
+    }
+
+    input_size = 224
+    network = mobilenet.MobileNet(model_id='MobileNetV3Large',
+                                  width_multiplier=width_multiplier)
+    self.assertEqual(network.count_params(),
+                     mobilenet_v3_large_params[width_multiplier])
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
+
+  @parameterized.parameters(1.0, 0.75)
+  def test_mobilenet_v3_small_scaling(self, width_multiplier):
+    mobilenet_v3_small_params = {
+        1.0: 1529968,
+        0.75: 1026552
+    }
+
+    input_size = 224
+    network = mobilenet.MobileNet(model_id='MobileNetV3Small',
+                                  width_multiplier=width_multiplier)
+    self.assertEqual(network.count_params(),
+                     mobilenet_v3_small_params[width_multiplier])
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
+
+  @parameterized.parameters(1.0, 0.75)
+  def test_mobilenet_v3_edgetpu_scaling(self, width_multiplier):
+    mobilenet_v3_edgetpu_params = {
+        1.0: 2849312,
+        0.75: 1737288
+    }
+
+    input_size = 224
+    network = mobilenet.MobileNet(model_id='MobileNetV3EdgeTPU',
+                                  width_multiplier=width_multiplier)
+    self.assertEqual(network.count_params(),
+                     mobilenet_v3_edgetpu_params[width_multiplier])
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
--- a/official/vision/beta/modeling/layers/nn_blocks.py
+++ b/official/vision/beta/modeling/layers/nn_blocks.py
@@ -14,10 +14,10 @@
 # ==============================================================================
 """Contains common building blocks for neural networks."""

-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, Text

 # Import libraries
-
+from absl import logging
 import tensorflow as tf

 from official.modeling import tf_utils
@@ -391,9 +391,18 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
               kernel_regularizer=None,
               bias_regularizer=None,
               activation='relu',
+               se_inner_activation='relu',
+               se_gating_activation='sigmoid',
+               depthwise_activation=None,
               use_sync_bn=False,
+               dilation_rate=1,
+               divisible_by=1,
+               regularize_depthwise=False,
+               use_depthwise=True,
+               use_residual=True,
               norm_momentum=0.99,
               norm_epsilon=0.001,
+               target_backbone='efficientnet',
               **kwargs):
    """An inverted bottleneck block with BN after convolutions.

@@ -414,7 +423,16 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
        Default to None.
      activation: `str` name of the activation function.
+      depthwise_activation: `str` name of the activation function for depthwise only.
      use_sync_bn: if True, use synchronized batch normalization.
+      dilation_rate: `int` an integer specifying the dilation rate to use for.
+      divisible_by: `int` ensures all inner dimensions are divisible by this number.
+      dilated convolution. Can be a single integer to specify the same value for
+      all spatial dimensions.
+      regularize_depthwise: `bool` whether or not apply regularization on depthwise.
+      use_depthwise: `bool` whether to uses fused convolutions instead of depthwise.
+      use_residual: `bool`whether to include residual connection between input
+      and output.
      norm_momentum: `float` normalization omentum for the moving average.
      norm_epsilon: `float` small float added to variance to avoid dividing by
        zero.
@@ -428,15 +446,26 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
    self._strides = strides
    self._kernel_size = kernel_size
    self._se_ratio = se_ratio
+    self._divisible_by = divisible_by
    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
+    self._dilation_rate = dilation_rate
    self._use_sync_bn = use_sync_bn
+    self._regularize_depthwise = regularize_depthwise
+    self._use_depthwise = use_depthwise
+    self._use_residual = use_residual
    self._activation = activation
+    self._se_inner_activation = se_inner_activation
+    self._se_gating_activation = se_gating_activation
+    self._depthwise_activation = depthwise_activation
    self._kernel_initializer = kernel_initializer
    self._norm_momentum = norm_momentum
    self._norm_epsilon = norm_epsilon
    self._kernel_regularizer = kernel_regularizer
    self._bias_regularizer = bias_regularizer
+    self._target_backbone = target_backbone

+    if target_backbone == 'mobilenet':
+      self._se_gating_activation = 'hard_sigmoid'
    if use_sync_bn:
      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
    else:
@@ -446,14 +475,32 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
    else:
      self._bn_axis = 1
    self._activation_fn = tf_utils.get_activation(activation)
+    if not depthwise_activation:
+      self._depthwise_activation = activation
+    self._depthwise_activation_fn = tf_utils.get_activation(
+        self._depthwise_activation)
+    if regularize_depthwise:
+      self._depthsize_regularizer = kernel_regularizer
+    else:
+      self._depthsize_regularizer = None

  def build(self, input_shape):
-    if self._expand_ratio != 1:
+    expand_filters = self._in_filters
+    if self._expand_ratio > 1:
      # First 1x1 conv for channel expansion.
+      expand_filters = nn_layers.make_divisible(
+          self._in_filters * self._expand_ratio, self._divisible_by)
+      logging.info('expand_filter: {}, divisible_version {}'.format(
+          self._in_filters * self._expand_ratio, expand_filters
+      ))
+      expand_kernel = 1 if self._use_depthwise else self._kernel_size
+      expand_stride = 1 if self._use_depthwise else self._strides
+
      self._conv0 = tf.keras.layers.Conv2D(
-          filters=self._in_filters * self._expand_ratio,
-          kernel_size=1,
-          strides=1,
+          filters=expand_filters,
+          kernel_size=expand_kernel,
+          strides=expand_stride,
+          padding='same',
          use_bias=False,
          kernel_initializer=self._kernel_initializer,
          kernel_regularizer=self._kernel_regularizer,
@@ -463,14 +510,17 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
          momentum=self._norm_momentum,
          epsilon=self._norm_epsilon)

+    if self._use_depthwise:
      # Depthwise conv.
      self._conv1 = tf.keras.layers.DepthwiseConv2D(
          kernel_size=(self._kernel_size, self._kernel_size),
          strides=self._strides,
          padding='same',
+          depth_multiplier=1,
+          dilation_rate=self._dilation_rate,
          use_bias=False,
          depthwise_initializer=self._kernel_initializer,
-        depthwise_regularizer=self._kernel_regularizer,
+          depthwise_regularizer=self._depthsize_regularizer,
          bias_regularizer=self._bias_regularizer)
      self._norm1 = self._norm(
          axis=self._bn_axis,
@@ -479,13 +529,20 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):

    # Squeeze and excitation.
    if self._se_ratio is not None and self._se_ratio > 0 and self._se_ratio <= 1:
+      logging.info('Use Squeeze and excitation.')
+      in_filters = self._in_filters
+      if self._target_backbone == 'mobilenet':
+        in_filters = expand_filters
      self._squeeze_excitation = nn_layers.SqueezeExcitation(
-          in_filters=self._in_filters,
+          in_filters=in_filters,
+          out_filters=expand_filters,
          se_ratio=self._se_ratio,
-          expand_ratio=self._expand_ratio,
+          divisible_by=self._divisible_by,
          kernel_initializer=self._kernel_initializer,
          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)
+          bias_regularizer=self._bias_regularizer,
+          activation=self._se_inner_activation,
+          gating_activation=self._se_gating_activation)
    else:
      self._squeeze_excitation = None

@@ -494,6 +551,7 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
        filters=self._out_filters,
        kernel_size=1,
        strides=1,
+        padding='same',
        use_bias=False,
        kernel_initializer=self._kernel_initializer,
        kernel_regularizer=self._kernel_regularizer,
@@ -519,12 +577,20 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
        'strides': self._strides,
        'kernel_size': self._kernel_size,
        'se_ratio': self._se_ratio,
+        'divisible_by': self._divisible_by,
        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
        'kernel_initializer': self._kernel_initializer,
        'kernel_regularizer': self._kernel_regularizer,
        'bias_regularizer': self._bias_regularizer,
        'activation': self._activation,
+        'se_inner_activation': self._se_inner_activation,
+        'se_gating_activation': self._se_gating_activation,
+        'depthwise_activation': self._depthwise_activation,
+        'dilation_rate': self._dilation_rate,
        'use_sync_bn': self._use_sync_bn,
+        'regularize_depthwise': self._regularize_depthwise,
+        'use_depthwise': self._use_depthwise,
+        'use_residual': self._use_residual,
        'norm_momentum': self._norm_momentum,
        'norm_epsilon': self._norm_epsilon
    }
@@ -533,16 +599,17 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):

  def call(self, inputs, training=None):
    shortcut = inputs
-    if self._expand_ratio != 1:
+    if self._expand_ratio > 1:
      x = self._conv0(inputs)
      x = self._norm0(x)
      x = self._activation_fn(x)
    else:
      x = inputs

+    if self._use_depthwise:
      x = self._conv1(x)
      x = self._norm1(x)
-    x = self._activation_fn(x)
+      x = self._depthwise_activation_fn(x)

    if self._squeeze_excitation:
      x = self._squeeze_excitation(x)
@@ -550,7 +617,9 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
    x = self._conv2(x)
    x = self._norm2(x)

-    if self._in_filters == self._out_filters and self._strides == 1:
+    if (self._use_residual and
+        self._in_filters == self._out_filters and
+        self._strides == 1):
      if self._stochastic_depth:
        x = self._stochastic_depth(x, training=training)
      x = tf.add(x, shortcut)
@@ -989,3 +1058,241 @@ class ReversibleLayer(tf.keras.layers.Layer):

    activations = reversible(inputs)
    return activations
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class DepthwiseSeparableConvBlock(tf.keras.layers.Layer):
+  """An depthwise separable convolution block with batch normalization."""
+
+  def __init__(self,
+               filters: int,
+               kernel_size: int = 3,
+               strides: int = 1,
+               regularize_depthwise=False,
+               activation: Text = 'relu6',
+               kernel_initializer: Text = 'VarianceScaling',
+               kernel_regularizer: Optional[
+                 tf.keras.regularizers.Regularizer] = None,
+               dilation_rate: int = 1,
+               use_sync_bn: bool = False,
+               norm_momentum: float = 0.99,
+               norm_epsilon: float = 0.001,
+               **kwargs):
+    """An convolution block with batch normalization.
+
+    Args:
+      filters: `int` number of filters for the first two convolutions. Note that
+        the third and final convolution will use 4 times as many filters.
+      kernel_size: `int` an integer specifying the height and width of the
+      2D convolution window.
+      strides: `int` block stride. If greater than 1, this block will ultimately
+        downsample the input.
+      regularize_depthwise: if Ture, apply regularization on depthwise.
+      activation: `str` name of the activation function.
+      kernel_size: `int` kernel_size of the conv layer.
+      kernel_initializer: kernel_initializer for convolutional layers.
+      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
+                          Default to None.
+      dilation_rate: an integer or tuple/list of 2 integers, specifying
+        the dilation rate to use for dilated convolution.
+        Can be a single integer to specify the same value for
+        all spatial dimensions.
+      use_sync_bn: if True, use synchronized batch normalization.
+      norm_momentum: `float` normalization omentum for the moving average.
+      norm_epsilon: `float` small float added to variance to avoid dividing by
+        zero.
+      **kwargs: keyword arguments to be passed.
+    """
+    super(DepthwiseSeparableConvBlock, self).__init__(**kwargs)
+    self._filters = filters
+    self._kernel_size = kernel_size
+    self._strides = strides
+    self._activation = activation
+    self._regularize_depthwise = regularize_depthwise
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._dilation_rate = dilation_rate
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+
+    if use_sync_bn:
+      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = tf.keras.layers.BatchNormalization
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._activation_fn = tf_utils.get_activation(activation)
+    if regularize_depthwise:
+      self._depthsize_regularizer = kernel_regularizer
+    else:
+      self._depthsize_regularizer = None
+
+  def get_config(self):
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'regularize_depthwise': self._regularize_depthwise,
+        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    base_config = super(DepthwiseSeparableConvBlock, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def build(self, input_shape):
+
+    self._dwconv0 = tf.keras.layers.DepthwiseConv2D(
+        kernel_size=self._kernel_size,
+        strides=self._strides,
+        padding='same',
+        depth_multiplier=1,
+        dilation_rate=self._dilation_rate,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._depthsize_regularizer,
+        use_bias=False)
+    self._norm0 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+
+    self._conv1 = tf.keras.layers.Conv2D(
+        filters=self._filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer)
+    self._norm1 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+
+    super(DepthwiseSeparableConvBlock, self).build(input_shape)
+
+  def call(self, inputs, training=None):
+    x = self._dwconv0(inputs)
+    x = self._norm0(x)
+    x = self._activation_fn(x)
+
+    x = self._conv1(x)
+    x = self._norm1(x)
+    return self._activation_fn(x)
+
+
+class Conv2DBNBlock(tf.keras.layers.Layer):
+  """A convolution block with batch normalization."""
+
+  def __init__(self,
+               filters: int,
+               kernel_size: int = 3,
+               strides: int = 1,
+               use_bias: bool = False,
+               activation: Text = 'relu6',
+               kernel_initializer: Text = 'VarianceScaling',
+               kernel_regularizer: Optional[
+                 tf.keras.regularizers.Regularizer] = None,
+               bias_regularizer: Optional[
+                 tf.keras.regularizers.Regularizer] = None,
+               use_normalization: bool = True,
+               use_sync_bn: bool = False,
+               norm_momentum: float = 0.99,
+               norm_epsilon: float = 0.001,
+               **kwargs):
+    """A convolution block with batch normalization.
+
+    Args:
+      filters: `int` number of filters for the first two convolutions. Note that
+        the third and final convolution will use 4 times as many filters.
+      kernel_size: `int` an integer specifying the height and width of the
+      2D convolution window.
+      strides: `int` block stride. If greater than 1, this block will ultimately
+        downsample the input.
+      use_bias: if True, use biase in the convolution layer.
+      activation: `str` name of the activation function.
+      kernel_size: `int` kernel_size of the conv layer.
+      kernel_initializer: kernel_initializer for convolutional layers.
+      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
+                          Default to None.
+      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
+                        Default to None.
+      use_normalization: if True, use batch normalization.
+      use_sync_bn: if True, use synchronized batch normalization.
+      norm_momentum: `float` normalization momentum for the moving average.
+      norm_epsilon: `float` small float added to variance to avoid dividing by
+        zero.
+      **kwargs: keyword arguments to be passed.
+    """
+    super(Conv2DBNBlock, self).__init__(**kwargs)
+    self._filters = filters
+    self._kernel_size = kernel_size
+    self._strides = strides
+    self._activation = activation
+    self._use_bias = use_bias
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._use_normalization = use_normalization
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+
+    if use_sync_bn:
+      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = tf.keras.layers.BatchNormalization
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._activation_fn = tf_utils.get_activation(activation)
+
+  def get_config(self):
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'kernel_size': self._kernel_size,
+        'use_bias': self._use_bias,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'use_normalization': self._use_normalization,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    base_config = super(Conv2DBNBlock, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def build(self, input_shape):
+    self._conv0 = tf.keras.layers.Conv2D(
+        filters=self._filters,
+        kernel_size=self._kernel_size,
+        strides=self._strides,
+        padding='same',
+        use_bias=self._use_bias,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    if self._use_normalization:
+      self._norm0 = self._norm(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+
+    super(Conv2DBNBlock, self).build(input_shape)
+
+  def call(self, inputs, training=None):
+    x = self._conv0(inputs)
+    if self._use_normalization:
+      x = self._norm0(x)
+    return self._activation_fn(x)
\ No newline at end of file
--- a/official/vision/beta/modeling/layers/nn_layers.py
+++ b/official/vision/beta/modeling/layers/nn_layers.py
@@ -15,45 +15,95 @@
 """Contains common building blocks for neural networks."""

 # Import libraries
+from absl import logging
+from typing import Optional
+
 import tensorflow as tf

 from official.modeling import tf_utils


+def make_divisible(value: float,
+                   divisor: int,
+                   min_value: Optional[float] = None
+                   ) -> int:
+  """This utility function is to ensure that all layers have a channel number
+  that is divisible by 8.
+  Args:
+    value: `float` original value.
+    divisor: `int` the divisor that need to be checked upon.
+    min_value: `float` minimum value threshold.
+
+  Returns:
+    The adjusted value in `int` that divisible against divisor.
+  """
+  if min_value is None:
+    min_value = divisor
+  new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
+  # Make sure that round down does not go down by more than 10%.
+  if new_value < 0.9 * value:
+    new_value += divisor
+  return new_value
+
+
+def round_filters(filters: int,
+                  multiplier: float,
+                  divisor: int = 8,
+                  min_depth: Optional[int] = None,
+                  skip: bool = False):
+  """Round number of filters based on width multiplier."""
+  orig_f = filters
+  if skip or not multiplier:
+    return filters
+
+  new_filters = make_divisible(value=filters * multiplier,
+                               divisor=divisor,
+                               min_value=min_depth)
+
+  logging.info('round_filter input=%s output=%s', orig_f, new_filters)
+  return int(new_filters)
+
+
 @tf.keras.utils.register_keras_serializable(package='Vision')
 class SqueezeExcitation(tf.keras.layers.Layer):
  """Squeeze and excitation layer."""

  def __init__(self,
               in_filters,
+               out_filters,
               se_ratio,
-               expand_ratio,
+               divisible_by=1,
               kernel_initializer='VarianceScaling',
               kernel_regularizer=None,
               bias_regularizer=None,
               activation='relu',
+               gating_activation='sigmoid',
               **kwargs):
    """Implementation for squeeze and excitation.

    Args:
      in_filters: `int` number of filters of the input tensor.
+      out_filters: `int` number of filters of the output tensor.
      se_ratio: `float` or None. If not None, se ratio for the squeeze and
        excitation layer.
-      expand_ratio: `int` expand_ratio for a MBConv block.
+      divisible_by: `int` ensures all inner dimensions are divisible by this number.
      kernel_initializer: kernel_initializer for convolutional layers.
      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
        Default to None.
      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
        Default to None.
      activation: `str` name of the activation function.
+      gating_activation: `str` name of the activation function for final gating function.
      **kwargs: keyword arguments to be passed.
    """
    super(SqueezeExcitation, self).__init__(**kwargs)

    self._in_filters = in_filters
+    self._out_filters = out_filters
    self._se_ratio = se_ratio
-    self._expand_ratio = expand_ratio
+    self._divisible_by = divisible_by
    self._activation = activation
+    self._gating_activation = gating_activation
    self._kernel_initializer = kernel_initializer
    self._kernel_regularizer = kernel_regularizer
    self._bias_regularizer = bias_regularizer
@@ -62,9 +112,12 @@ class SqueezeExcitation(tf.keras.layers.Layer):
    else:
      self._spatial_axis = [2, 3]
    self._activation_fn = tf_utils.get_activation(activation)
+    self._gating_activation_fn = tf_utils.get_activation(gating_activation)

  def build(self, input_shape):
-    num_reduced_filters = max(1, int(self._in_filters * self._se_ratio))
+    num_reduced_filters = make_divisible(
+        max(1, int(self._in_filters * self._se_ratio)),
+        divisor=self._divisible_by)

    self._se_reduce = tf.keras.layers.Conv2D(
        filters=num_reduced_filters,
@@ -77,7 +130,7 @@ class SqueezeExcitation(tf.keras.layers.Layer):
        bias_regularizer=self._bias_regularizer)

    self._se_expand = tf.keras.layers.Conv2D(
-        filters=self._in_filters * self._expand_ratio,
+        filters=self._out_filters,
        kernel_size=1,
        strides=1,
        padding='same',
@@ -91,22 +144,24 @@ class SqueezeExcitation(tf.keras.layers.Layer):
  def get_config(self):
    config = {
        'in_filters': self._in_filters,
+        'out_filters': self._out_filters,
        'se_ratio': self._se_ratio,
-        'expand_ratio': self._expand_ratio,
+        'divisible_by': self._divisible_by,
        'strides': self._strides,
        'kernel_initializer': self._kernel_initializer,
        'kernel_regularizer': self._kernel_regularizer,
        'bias_regularizer': self._bias_regularizer,
        'activation': self._activation,
+        'gating_activation': self._gating_activation,
    }
    base_config = super(SqueezeExcitation, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def call(self, inputs):
    x = tf.reduce_mean(inputs, self._spatial_axis, keepdims=True)
-    x = self._se_expand(self._activation_fn(self._se_reduce(x)))
-
-    return tf.sigmoid(x) * inputs
+    x = self._activation_fn(self._se_reduce(x))
+    x = self._gating_activation_fn(self._se_expand(x))
+    return x * inputs


 @tf.keras.utils.register_keras_serializable(package='Vision')