Internal change

PiperOrigin-RevId: 425740068

Internal change
PiperOrigin-RevId: 425740068
7785dec0 · Yeqing Li · A. Unique TensorFlower · 9c93f07c · 9c93f07c · 9c93f07c
Commit 7785dec0 authored Feb 01, 2022 by Yeqing Li Committed by A. Unique TensorFlower Feb 01, 2022
20 changed files
--- a/official/vision/modeling/__init__.py
+++ b/official/vision/modeling/__init__.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Modeling package definition."""
-from official.vision.modeling import backbones
-from official.vision.modeling import decoders
-from official.vision.modeling import heads
-from official.vision.modeling import layers
--- a/official/vision/modeling/backbones/__init__.py
+++ b/official/vision/modeling/backbones/__init__.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Backbones package definition."""
-from official.vision.modeling.backbones.efficientnet import EfficientNet
-from official.vision.modeling.backbones.mobiledet import MobileDet
-from official.vision.modeling.backbones.mobilenet import MobileNet
-from official.vision.modeling.backbones.resnet import ResNet
-from official.vision.modeling.backbones.resnet_3d import ResNet3D
-from official.vision.modeling.backbones.resnet_deeplab import DilatedResNet
-from official.vision.modeling.backbones.revnet import RevNet
-from official.vision.modeling.backbones.spinenet import SpineNet
-from official.vision.modeling.backbones.spinenet_mobile import SpineNetMobile
--- a/official/vision/modeling/backbones/efficientnet.py
+++ b/official/vision/modeling/backbones/efficientnet.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains definitions of EfficientNet Networks."""
-import math
-from typing import Any, List, Tuple
-# Import libraries
-import tensorflow as tf
-from official.modeling import hyperparams
-from official.modeling import tf_utils
-from official.vision.modeling.backbones import factory
-from official.vision.modeling.layers import nn_blocks
-from official.vision.modeling.layers import nn_layers
-layers = tf.keras.layers
-# The fixed EfficientNet-B0 architecture discovered by NAS.
-# Each element represents a specification of a building block:
-# (block_fn, block_repeats, kernel_size, strides, expand_ratio, in_filters,
-# out_filters, is_output)
-EN_B0_BLOCK_SPECS = [
-    ('mbconv', 1, 3, 1, 1, 32, 16, False),
-    ('mbconv', 2, 3, 2, 6, 16, 24, True),
-    ('mbconv', 2, 5, 2, 6, 24, 40, True),
-    ('mbconv', 3, 3, 2, 6, 40, 80, False),
-    ('mbconv', 3, 5, 1, 6, 80, 112, True),
-    ('mbconv', 4, 5, 2, 6, 112, 192, False),
-    ('mbconv', 1, 3, 1, 6, 192, 320, True),
-]
-SCALING_MAP = {
-    'b0': dict(width_scale=1.0, depth_scale=1.0),
-    'b1': dict(width_scale=1.0, depth_scale=1.1),
-    'b2': dict(width_scale=1.1, depth_scale=1.2),
-    'b3': dict(width_scale=1.2, depth_scale=1.4),
-    'b4': dict(width_scale=1.4, depth_scale=1.8),
-    'b5': dict(width_scale=1.6, depth_scale=2.2),
-    'b6': dict(width_scale=1.8, depth_scale=2.6),
-    'b7': dict(width_scale=2.0, depth_scale=3.1),
-}
-class BlockSpec():
-  """A container class that specifies the block configuration for MnasNet."""
-  def __init__(self, block_fn: str, block_repeats: int, kernel_size: int,
-               strides: int, expand_ratio: float, in_filters: int,
-               out_filters: int, is_output: bool, width_scale: float,
-               depth_scale: float):
-    self.block_fn = block_fn
-    self.block_repeats = round_repeats(block_repeats, depth_scale)
-    self.kernel_size = kernel_size
-    self.strides = strides
-    self.expand_ratio = expand_ratio
-    self.in_filters = nn_layers.round_filters(in_filters, width_scale)
-    self.out_filters = nn_layers.round_filters(out_filters, width_scale)
-    self.is_output = is_output
-def round_repeats(repeats: int, multiplier: float, skip: bool = False) -> int:
-  """Returns rounded number of filters based on depth multiplier."""
-  if skip or not multiplier:
-    return repeats
-  return int(math.ceil(multiplier * repeats))
-def block_spec_decoder(specs: List[Tuple[Any, ...]], width_scale: float,
-                       depth_scale: float) -> List[BlockSpec]:
-  """Decodes and returns specs for a block."""
-  decoded_specs = []
-  for s in specs:
-    s = s + (
-        width_scale,
-        depth_scale,
-    )
-    decoded_specs.append(BlockSpec(*s))
-  return decoded_specs
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class EfficientNet(tf.keras.Model):
-  """Creates an EfficientNet family model.
-  This implements the EfficientNet model from:
-    Mingxing Tan, Quoc V. Le.
-    EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks.
-    (https://arxiv.org/pdf/1905.11946)
-  """
-  def __init__(self,
-               model_id: str,
-               input_specs: tf.keras.layers.InputSpec = layers.InputSpec(
-                   shape=[None, None, None, 3]),
-               se_ratio: float = 0.0,
-               stochastic_depth_drop_rate: float = 0.0,
-               kernel_initializer: str = 'VarianceScaling',
-               kernel_regularizer: tf.keras.regularizers.Regularizer = None,
-               bias_regularizer: tf.keras.regularizers.Regularizer = None,
-               activation: str = 'relu',
-               use_sync_bn: bool = False,
-               norm_momentum: float = 0.99,
-               norm_epsilon: float = 0.001,  # pytype: disable=annotation-type-mismatch  # typed-keras
-               **kwargs):
-    """Initializes an EfficientNet model.
-    Args:
-      model_id: A `str` of model ID of EfficientNet.
-      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
-      se_ratio: A `float` of squeeze and excitation ratio for inverted
-        bottleneck blocks.
-      stochastic_depth_drop_rate: A `float` of drop rate for drop connect layer.
-      kernel_initializer: A `str` for kernel initializer of convolutional
-        layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default to None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-        Default to None.
-      activation: A `str` of name of the activation function.
-      use_sync_bn: If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    self._model_id = model_id
-    self._input_specs = input_specs
-    self._se_ratio = se_ratio
-    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
-    self._use_sync_bn = use_sync_bn
-    self._activation = activation
-    self._kernel_initializer = kernel_initializer
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    self._kernel_regularizer = kernel_regularizer
-    self._bias_regularizer = bias_regularizer
-    if use_sync_bn:
-      self._norm = layers.experimental.SyncBatchNormalization
-    else:
-      self._norm = layers.BatchNormalization
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      bn_axis = -1
-    else:
-      bn_axis = 1
-    # Build EfficientNet.
-    inputs = tf.keras.Input(shape=input_specs.shape[1:])
-    width_scale = SCALING_MAP[model_id]['width_scale']
-    depth_scale = SCALING_MAP[model_id]['depth_scale']
-    # Build stem.
-    x = layers.Conv2D(
-        filters=nn_layers.round_filters(32, width_scale),
-        kernel_size=3,
-        strides=2,
-        use_bias=False,
-        padding='same',
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer)(
-            inputs)
-    x = self._norm(
-        axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-            x)
-    x = tf_utils.get_activation(activation)(x)
-    # Build intermediate blocks.
-    endpoints = {}
-    endpoint_level = 2
-    decoded_specs = block_spec_decoder(EN_B0_BLOCK_SPECS, width_scale,
-                                       depth_scale)
-    for i, specs in enumerate(decoded_specs):
-      x = self._block_group(
-          inputs=x, specs=specs, name='block_group_{}'.format(i))
-      if specs.is_output:
-        endpoints[str(endpoint_level)] = x
-        endpoint_level += 1
-    # Build output specs for downstream tasks.
-    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
-    # Build the final conv for classification.
-    x = layers.Conv2D(
-        filters=nn_layers.round_filters(1280, width_scale),
-        kernel_size=1,
-        strides=1,
-        use_bias=False,
-        padding='same',
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer)(
-            x)
-    x = self._norm(
-        axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-            x)
-    endpoints[str(endpoint_level)] = tf_utils.get_activation(activation)(x)
-    super(EfficientNet, self).__init__(
-        inputs=inputs, outputs=endpoints, **kwargs)
-  def _block_group(self,
-                   inputs: tf.Tensor,
-                   specs: BlockSpec,
-                   name: str = 'block_group'):
-    """Creates one group of blocks for the EfficientNet model.
-    Args:
-      inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
-      specs: The specifications for one inverted bottleneck block group.
-      name: A `str` name for the block.
-    Returns:
-      The output `tf.Tensor` of the block layer.
-    """
-    if specs.block_fn == 'mbconv':
-      block_fn = nn_blocks.InvertedBottleneckBlock
-    else:
-      raise ValueError('Block func {} not supported.'.format(specs.block_fn))
-    x = block_fn(
-        in_filters=specs.in_filters,
-        out_filters=specs.out_filters,
-        expand_ratio=specs.expand_ratio,
-        strides=specs.strides,
-        kernel_size=specs.kernel_size,
-        se_ratio=self._se_ratio,
-        stochastic_depth_drop_rate=self._stochastic_depth_drop_rate,
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer,
-        activation=self._activation,
-        use_sync_bn=self._use_sync_bn,
-        norm_momentum=self._norm_momentum,
-        norm_epsilon=self._norm_epsilon)(
-            inputs)
-    for _ in range(1, specs.block_repeats):
-      x = block_fn(
-          in_filters=specs.out_filters,  # Set 'in_filters' to 'out_filters'.
-          out_filters=specs.out_filters,
-          expand_ratio=specs.expand_ratio,
-          strides=1,  # Fix strides to 1.
-          kernel_size=specs.kernel_size,
-          se_ratio=self._se_ratio,
-          stochastic_depth_drop_rate=self._stochastic_depth_drop_rate,
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer,
-          activation=self._activation,
-          use_sync_bn=self._use_sync_bn,
-          norm_momentum=self._norm_momentum,
-          norm_epsilon=self._norm_epsilon)(
-              x)
-    return tf.identity(x, name=name)
-  def get_config(self):
-    config_dict = {
-        'model_id': self._model_id,
-        'se_ratio': self._se_ratio,
-        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
-        'kernel_initializer': self._kernel_initializer,
-        'kernel_regularizer': self._kernel_regularizer,
-        'bias_regularizer': self._bias_regularizer,
-        'activation': self._activation,
-        'use_sync_bn': self._use_sync_bn,
-        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon
-    }
-    return config_dict
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-  @property
-  def output_specs(self):
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-@factory.register_backbone_builder('efficientnet')
-def build_efficientnet(
-    input_specs: tf.keras.layers.InputSpec,
-    backbone_config: hyperparams.Config,
-    norm_activation_config: hyperparams.Config,
-    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
-  """Builds EfficientNet backbone from a config."""
-  backbone_type = backbone_config.type
-  backbone_cfg = backbone_config.get()
-  assert backbone_type == 'efficientnet', (f'Inconsistent backbone type '
-                                           f'{backbone_type}')
-  return EfficientNet(
-      model_id=backbone_cfg.model_id,
-      input_specs=input_specs,
-      stochastic_depth_drop_rate=backbone_cfg.stochastic_depth_drop_rate,
-      se_ratio=backbone_cfg.se_ratio,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
--- a/official/vision/modeling/backbones/efficientnet_test.py
+++ b/official/vision/modeling/backbones/efficientnet_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Tests for EfficientNet."""
-# Import libraries
-from absl.testing import parameterized
-import tensorflow as tf
-from official.vision.modeling.backbones import efficientnet
-class EfficientNetTest(parameterized.TestCase, tf.test.TestCase):
-  @parameterized.parameters(32, 224)
-  def test_network_creation(self, input_size):
-    """Test creation of EfficientNet family models."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    network = efficientnet.EfficientNet(model_id='b0')
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    endpoints = network(inputs)
-    self.assertAllEqual([1, input_size / 2**2, input_size / 2**2, 24],
-                        endpoints['2'].shape.as_list())
-    self.assertAllEqual([1, input_size / 2**3, input_size / 2**3, 40],
-                        endpoints['3'].shape.as_list())
-    self.assertAllEqual([1, input_size / 2**4, input_size / 2**4, 112],
-                        endpoints['4'].shape.as_list())
-    self.assertAllEqual([1, input_size / 2**5, input_size / 2**5, 320],
-                        endpoints['5'].shape.as_list())
-  @parameterized.parameters('b0', 'b3', 'b6')
-  def test_network_scaling(self, model_id):
-    """Test compound scaling."""
-    efficientnet_params = {
-        'b0': 4049564,
-        'b3': 10783528,
-        'b6': 40960136,
-    }
-    tf.keras.backend.set_image_data_format('channels_last')
-    input_size = 32
-    network = efficientnet.EfficientNet(model_id=model_id, se_ratio=0.25)
-    self.assertEqual(network.count_params(), efficientnet_params[model_id])
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    _ = network(inputs)
-  @parameterized.parameters(1, 3)
-  def test_input_specs(self, input_dim):
-    """Test different input feature dimensions."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
-    network = efficientnet.EfficientNet(model_id='b0', input_specs=input_specs)
-    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
-    _ = network(inputs)
-  def test_serialize_deserialize(self):
-    # Create a network object that sets all of its config options.
-    kwargs = dict(
-        model_id='b0',
-        se_ratio=0.25,
-        stochastic_depth_drop_rate=None,
-        use_sync_bn=False,
-        kernel_initializer='VarianceScaling',
-        kernel_regularizer=None,
-        bias_regularizer=None,
-        activation='relu',
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-    )
-    network = efficientnet.EfficientNet(**kwargs)
-    expected_config = dict(kwargs)
-    self.assertEqual(network.get_config(), expected_config)
-    # Create another network object from the first object's config.
-    new_network = efficientnet.EfficientNet.from_config(network.get_config())
-    # Validate that the config can be forced to JSON.
-    _ = new_network.to_json()
-    # If the serialization was successful, the new config should match the old.
-    self.assertAllEqual(network.get_config(), new_network.get_config())
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/modeling/backbones/factory.py
+++ b/official/vision/modeling/backbones/factory.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Backbone registers and factory method.
-One can regitered a new backbone model by the following two steps:
-1 Import the factory and register the build in the backbone file.
-2 Import the backbone class and add a build in __init__.py.
-```
-# my_backbone.py
-from modeling.backbones import factory
-class MyBackbone():
-  ...
-@factory.register_backbone_builder('my_backbone')
-def build_my_backbone():
-  return MyBackbone()
-# backbones/__init__.py adds import
-from modeling.backbones.my_backbone import MyBackbone
-```
-If one wants the MyBackbone class to be used only by those binary
-then don't imported the backbone module in backbones/__init__.py, but import it
-in place that uses it.
-"""
-from typing import Sequence, Union
-# Import libraries
-import tensorflow as tf
-from official.core import registry
-from official.modeling import hyperparams
-_REGISTERED_BACKBONE_CLS = {}
-def register_backbone_builder(key: str):
-  """Decorates a builder of backbone class.
-  The builder should be a Callable (a class or a function).
-  This decorator supports registration of backbone builder as follows:
-  ```
-  class MyBackbone(tf.keras.Model):
-    pass
-  @register_backbone_builder('mybackbone')
-  def builder(input_specs, config, l2_reg):
-    return MyBackbone(...)
-  # Builds a MyBackbone object.
-  my_backbone = build_backbone_3d(input_specs, config, l2_reg)
-  ```
-  Args:
-    key: A `str` of key to look up the builder.
-  Returns:
-    A callable for using as class decorator that registers the decorated class
-    for creation from an instance of task_config_cls.
-  """
-  return registry.register(_REGISTERED_BACKBONE_CLS, key)
-def build_backbone(input_specs: Union[tf.keras.layers.InputSpec,
-                                      Sequence[tf.keras.layers.InputSpec]],
-                   backbone_config: hyperparams.Config,
-                   norm_activation_config: hyperparams.Config,
-                   l2_regularizer: tf.keras.regularizers.Regularizer = None,
-                   **kwargs) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
-  """Builds backbone from a config.
-  Args:
-    input_specs: A (sequence of) `tf.keras.layers.InputSpec` of input.
-    backbone_config: A `OneOfConfig` of backbone config.
-    norm_activation_config: A config for normalization/activation layer.
-    l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
-      None.
-    **kwargs: Additional keyword args to be passed to backbone builder.
-  Returns:
-    A `tf.keras.Model` instance of the backbone.
-  """
-  backbone_builder = registry.lookup(_REGISTERED_BACKBONE_CLS,
-                                     backbone_config.type)
-  return backbone_builder(
-      input_specs=input_specs,
-      backbone_config=backbone_config,
-      norm_activation_config=norm_activation_config,
-      l2_regularizer=l2_regularizer,
-      **kwargs)
--- a/official/vision/modeling/backbones/factory_test.py
+++ b/official/vision/modeling/backbones/factory_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Tests for factory functions."""
-# Import libraries
-from absl.testing import parameterized
-import tensorflow as tf
-from tensorflow.python.distribute import combinations
-from official.vision.configs import backbones as backbones_cfg
-from official.vision.configs import backbones_3d as backbones_3d_cfg
-from official.vision.configs import common as common_cfg
-from official.vision.modeling import backbones
-from official.vision.modeling.backbones import factory
-class FactoryTest(tf.test.TestCase, parameterized.TestCase):
-  @combinations.generate(
-      combinations.combine(model_id=[18, 34, 50, 101, 152],))
-  def test_resnet_creation(self, model_id):
-    """Test creation of ResNet models."""
-    network = backbones.ResNet(
-        model_id=model_id, se_ratio=0.0, norm_momentum=0.99, norm_epsilon=1e-5)
-    backbone_config = backbones_cfg.Backbone(
-        type='resnet',
-        resnet=backbones_cfg.ResNet(model_id=model_id, se_ratio=0.0))
-    norm_activation_config = common_cfg.NormActivation(
-        norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False)
-    factory_network = factory.build_backbone(
-        input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
-        backbone_config=backbone_config,
-        norm_activation_config=norm_activation_config)
-    network_config = network.get_config()
-    factory_network_config = factory_network.get_config()
-    self.assertEqual(network_config, factory_network_config)
-  @combinations.generate(
-      combinations.combine(
-          model_id=['b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7'],
-          se_ratio=[0.0, 0.25],
-      ))
-  def test_efficientnet_creation(self, model_id, se_ratio):
-    """Test creation of EfficientNet models."""
-    network = backbones.EfficientNet(
-        model_id=model_id,
-        se_ratio=se_ratio,
-        norm_momentum=0.99,
-        norm_epsilon=1e-5)
-    backbone_config = backbones_cfg.Backbone(
-        type='efficientnet',
-        efficientnet=backbones_cfg.EfficientNet(
-            model_id=model_id, se_ratio=se_ratio))
-    norm_activation_config = common_cfg.NormActivation(
-        norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False)
-    factory_network = factory.build_backbone(
-        input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
-        backbone_config=backbone_config,
-        norm_activation_config=norm_activation_config)
-    network_config = network.get_config()
-    factory_network_config = factory_network.get_config()
-    self.assertEqual(network_config, factory_network_config)
-  @combinations.generate(
-      combinations.combine(
-          model_id=['MobileNetV1', 'MobileNetV2',
-                    'MobileNetV3Large', 'MobileNetV3Small',
-                    'MobileNetV3EdgeTPU'],
-          filter_size_scale=[1.0, 0.75],
-      ))
-  def test_mobilenet_creation(self, model_id, filter_size_scale):
-    """Test creation of Mobilenet models."""
-    network = backbones.MobileNet(
-        model_id=model_id,
-        filter_size_scale=filter_size_scale,
-        norm_momentum=0.99,
-        norm_epsilon=1e-5)
-    backbone_config = backbones_cfg.Backbone(
-        type='mobilenet',
-        mobilenet=backbones_cfg.MobileNet(
-            model_id=model_id, filter_size_scale=filter_size_scale))
-    norm_activation_config = common_cfg.NormActivation(
-        norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False)
-    factory_network = factory.build_backbone(
-        input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
-        backbone_config=backbone_config,
-        norm_activation_config=norm_activation_config)
-    network_config = network.get_config()
-    factory_network_config = factory_network.get_config()
-    self.assertEqual(network_config, factory_network_config)
-  @combinations.generate(combinations.combine(model_id=['49'],))
-  def test_spinenet_creation(self, model_id):
-    """Test creation of SpineNet models."""
-    input_size = 128
-    min_level = 3
-    max_level = 7
-    input_specs = tf.keras.layers.InputSpec(
-        shape=[None, input_size, input_size, 3])
-    network = backbones.SpineNet(
-        input_specs=input_specs,
-        min_level=min_level,
-        max_level=max_level,
-        norm_momentum=0.99,
-        norm_epsilon=1e-5)
-    backbone_config = backbones_cfg.Backbone(
-        type='spinenet',
-        spinenet=backbones_cfg.SpineNet(model_id=model_id))
-    norm_activation_config = common_cfg.NormActivation(
-        norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False)
-    factory_network = factory.build_backbone(
-        input_specs=tf.keras.layers.InputSpec(
-            shape=[None, input_size, input_size, 3]),
-        backbone_config=backbone_config,
-        norm_activation_config=norm_activation_config)
-    network_config = network.get_config()
-    factory_network_config = factory_network.get_config()
-    self.assertEqual(network_config, factory_network_config)
-  @combinations.generate(
-      combinations.combine(model_id=[38, 56, 104],))
-  def test_revnet_creation(self, model_id):
-    """Test creation of RevNet models."""
-    network = backbones.RevNet(
-        model_id=model_id, norm_momentum=0.99, norm_epsilon=1e-5)
-    backbone_config = backbones_cfg.Backbone(
-        type='revnet',
-        revnet=backbones_cfg.RevNet(model_id=model_id))
-    norm_activation_config = common_cfg.NormActivation(
-        norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False)
-    factory_network = factory.build_backbone(
-        input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
-        backbone_config=backbone_config,
-        norm_activation_config=norm_activation_config)
-    network_config = network.get_config()
-    factory_network_config = factory_network.get_config()
-    self.assertEqual(network_config, factory_network_config)
-  @combinations.generate(combinations.combine(model_type=['resnet_3d'],))
-  def test_resnet_3d_creation(self, model_type):
-    """Test creation of ResNet 3D models."""
-    backbone_cfg = backbones_3d_cfg.Backbone3D(type=model_type).get()
-    temporal_strides = []
-    temporal_kernel_sizes = []
-    for block_spec in backbone_cfg.block_specs:
-      temporal_strides.append(block_spec.temporal_strides)
-      temporal_kernel_sizes.append(block_spec.temporal_kernel_sizes)
-    _ = backbones.ResNet3D(
-        model_id=backbone_cfg.model_id,
-        temporal_strides=temporal_strides,
-        temporal_kernel_sizes=temporal_kernel_sizes,
-        norm_momentum=0.99,
-        norm_epsilon=1e-5)
-  @combinations.generate(
-      combinations.combine(
-          model_id=[
-              'MobileDetCPU',
-              'MobileDetDSP',
-              'MobileDetEdgeTPU',
-              'MobileDetGPU'],
-          filter_size_scale=[1.0, 0.75],
-      ))
-  def test_mobiledet_creation(self, model_id, filter_size_scale):
-    """Test creation of Mobiledet models."""
-    network = backbones.MobileDet(
-        model_id=model_id,
-        filter_size_scale=filter_size_scale,
-        norm_momentum=0.99,
-        norm_epsilon=1e-5)
-    backbone_config = backbones_cfg.Backbone(
-        type='mobiledet',
-        mobiledet=backbones_cfg.MobileDet(
-            model_id=model_id, filter_size_scale=filter_size_scale))
-    norm_activation_config = common_cfg.NormActivation(
-        norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False)
-    factory_network = factory.build_backbone(
-        input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
-        backbone_config=backbone_config,
-        norm_activation_config=norm_activation_config)
-    network_config = network.get_config()
-    factory_network_config = factory_network.get_config()
-    self.assertEqual(network_config, factory_network_config)
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/modeling/backbones/mobiledet.py
+++ b/official/vision/modeling/backbones/mobiledet.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Definitions of MobileDet Networks."""
-import dataclasses
-from typing import Any, Dict, Optional, Tuple, List
-import tensorflow as tf
-from official.modeling import hyperparams
-from official.vision.modeling.backbones import factory
-from official.vision.modeling.backbones import mobilenet
-from official.vision.modeling.layers import nn_blocks
-from official.vision.modeling.layers import nn_layers
-layers = tf.keras.layers
-#  pylint: disable=pointless-string-statement
-"""
-Architecture: https://arxiv.org/abs/1704.04861.
-"MobileDets: Searching for Object Detection Architectures for
-Mobile Accelerators" Yunyang Xiong, Hanxiao Liu, Suyog Gupta, Berkin Akin,
-Gabriel Bender, Yongzhe Wang, Pieter-Jan Kindermans, Mingxing Tan, Vikas Singh,
-Bo Chen
-Note that `round_down_protection` flag should be set to false for scaling
-of the network.
-"""
-MD_CPU_BLOCK_SPECS = {
-    'spec_name': 'MobileDetCPU',
-    # [expand_ratio] is set to 1 and [use_residual] is set to false
-    # for inverted_bottleneck_no_expansion
-    # [se_ratio] is set to 0.25 for all inverted_bottleneck layers
-    # [activation] is set to 'hard_swish' for all applicable layers
-    'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters',
-                          'activation', 'se_ratio', 'expand_ratio',
-                          'use_residual', 'is_output'],
-    'block_specs': [
-        ('convbn', 3, 2, 16, 'hard_swish', None, None, None, False),
-        # inverted_bottleneck_no_expansion
-        ('invertedbottleneck', 3, 1, 8, 'hard_swish', 0.25, 1., False, True),
-        ('invertedbottleneck', 3, 2, 16, 'hard_swish', 0.25, 4., False, True),
-        ('invertedbottleneck', 3, 2, 32, 'hard_swish', 0.25, 8., False, False),
-        ('invertedbottleneck', 3, 1, 32, 'hard_swish', 0.25, 4., True, False),
-        ('invertedbottleneck', 3, 1, 32, 'hard_swish', 0.25, 4., True, False),
-        ('invertedbottleneck', 3, 1, 32, 'hard_swish', 0.25, 4., True, True),
-        ('invertedbottleneck', 5, 2, 72, 'hard_swish', 0.25, 8., False, False),
-        ('invertedbottleneck', 3, 1, 72, 'hard_swish', 0.25, 8., True, False),
-        ('invertedbottleneck', 5, 1, 72, 'hard_swish', 0.25, 4., True, False),
-        ('invertedbottleneck', 3, 1, 72, 'hard_swish', 0.25, 4., True, False),
-        ('invertedbottleneck', 3, 1, 72, 'hard_swish', 0.25, 8., False, False),
-        ('invertedbottleneck', 3, 1, 72, 'hard_swish', 0.25, 8., True, False),
-        ('invertedbottleneck', 3, 1, 72, 'hard_swish', 0.25, 8., True, False),
-        ('invertedbottleneck', 3, 1, 72, 'hard_swish', 0.25, 8., True, True),
-        ('invertedbottleneck', 5, 2, 104, 'hard_swish', 0.25, 8., False, False),
-        ('invertedbottleneck', 5, 1, 104, 'hard_swish', 0.25, 4., True, False),
-        ('invertedbottleneck', 5, 1, 104, 'hard_swish', 0.25, 4., True, False),
-        ('invertedbottleneck', 3, 1, 104, 'hard_swish', 0.25, 4., True, False),
-        ('invertedbottleneck', 3, 1, 144, 'hard_swish', 0.25, 8., False, True),
-    ]
-}
-MD_DSP_BLOCK_SPECS = {
-    'spec_name': 'MobileDetDSP',
-    # [expand_ratio] is set to 1 and [use_residual] is set to false
-    # for inverted_bottleneck_no_expansion
-    # [use_depthwise] is set to False for fused_conv
-    # [se_ratio] is set to None for all inverted_bottleneck layers
-    # [activation] is set to 'relu6' for all applicable layers
-    'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters',
-                          'activation', 'se_ratio', 'expand_ratio',
-                          'input_compression_ratio', 'output_compression_ratio',
-                          'use_depthwise', 'use_residual', 'is_output'],
-    'block_specs': [
-        ('convbn', 3, 2, 32, 'relu6',
-         None, None, None, None, None, None, False),
-        # inverted_bottleneck_no_expansion
-        ('invertedbottleneck', 3, 1, 24, 'relu6',
-         None, 1., None, None, True, False, True),
-        ('invertedbottleneck', 3, 2, 32, 'relu6',
-         None, 4., None, None, False, False, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 32, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 32, 'relu6',
-         None, 4., None, None, True, True, False),
-        ('tucker', 3, 1, 32, 'relu6',
-         None, None, 0.25, 0.75, None, True, True),
-        ('invertedbottleneck', 3, 2, 64, 'relu6',
-         None, 8., None, None, False, False, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 64, 'relu6',
-         None, 4., None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 64, 'relu6',
-         None, 4., None, None, False, True, True),  # fused_conv
-        ('invertedbottleneck', 3, 2, 120, 'relu6',
-         None, 8., None, None, False, False, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 120, 'relu6',
-         None, 4., None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 120, 'relu6',
-         None, 8, None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 120, 'relu6',
-         None, 8., None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 144, 'relu6',
-         None, 8., None, None, False, False, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 144, 'relu6',
-         None, 8., None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 144, 'relu6',
-         None, 8, None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 144, 'relu6',
-         None, 8., None, None, True, True, True),
-        ('invertedbottleneck', 3, 2, 160, 'relu6',
-         None, 4, None, None, True, False, False),
-        ('invertedbottleneck', 3, 1, 160, 'relu6',
-         None, 4, None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 160, 'relu6',
-         None, 4., None, None, False, False, False),  # fused_conv
-        ('tucker', 3, 1, 160, 'relu6',
-         None, None, 0.75, 0.75, None, True, False),
-        ('invertedbottleneck', 3, 1, 240, 'relu6',
-         None, 8, None, None, True, False, True),
-    ]
-}
-MD_EdgeTPU_BLOCK_SPECS = {
-    'spec_name': 'MobileDetEdgeTPU',
-    # [use_depthwise] is set to False for fused_conv
-    # [se_ratio] is set to None for all inverted_bottleneck layers
-    # [activation] is set to 'relu6' for all applicable layers
-    'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters',
-                          'activation', 'se_ratio', 'expand_ratio',
-                          'input_compression_ratio', 'output_compression_ratio',
-                          'use_depthwise', 'use_residual', 'is_output'],
-    'block_specs': [
-        ('convbn', 3, 2, 32, 'relu6',
-         None, None, None, None, None, None, False),
-        ('tucker', 3, 1, 16, 'relu6',
-         None, None, 0.25, 0.75, None, False, True),
-        ('invertedbottleneck', 3, 2, 16, 'relu6',
-         None, 8., None, None, False, False, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 16, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 16, 'relu6',
-         None, 8., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 16, 'relu6',
-         None, 4., None, None, False, True, True),  # fused_conv
-        ('invertedbottleneck', 5, 2, 40, 'relu6',
-         None, 8., None, None, False, False, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 40, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 40, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 40, 'relu6',
-         None, 4., None, None, False, True, True),  # fused_conv
-        ('invertedbottleneck', 3, 2, 72, 'relu6',
-         None, 8, None, None, True, False, False),
-        ('invertedbottleneck', 3, 1, 72, 'relu6',
-         None, 8, None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 72, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 72, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 5, 1, 96, 'relu6',
-         None, 8, None, None, True, False, False),
-        ('invertedbottleneck', 5, 1, 96, 'relu6',
-         None, 8, None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 96, 'relu6',
-         None, 8, None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 96, 'relu6',
-         None, 8, None, None, True, True, True),
-        ('invertedbottleneck', 5, 2, 120, 'relu6',
-         None, 8, None, None, True, False, False),
-        ('invertedbottleneck', 3, 1, 120, 'relu6',
-         None, 8, None, None, True, True, False),
-        ('invertedbottleneck', 5, 1, 120, 'relu6',
-         None, 4, None, None, True, True, False),
-        ('invertedbottleneck', 3, 1, 120, 'relu6',
-         None, 8, None, None, True, True, False),
-        ('invertedbottleneck', 5, 1, 384, 'relu6',
-         None, 8, None, None, True, False, True),
-    ]
-}
-MD_GPU_BLOCK_SPECS = {
-    'spec_name': 'MobileDetGPU',
-    # [use_depthwise] is set to False for fused_conv
-    # [se_ratio] is set to None for all inverted_bottleneck layers
-    # [activation] is set to 'relu6' for all applicable layers
-    'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters',
-                          'activation', 'se_ratio', 'expand_ratio',
-                          'input_compression_ratio', 'output_compression_ratio',
-                          'use_depthwise', 'use_residual', 'is_output'],
-    'block_specs': [
-        # block 0
-        ('convbn', 3, 2, 32, 'relu6',
-         None, None, None, None, None, None, False),
-        # block 1
-        ('tucker', 3, 1, 16, 'relu6',
-         None, None, 0.25, 0.25, None, False, True),
-        # block 2
-        ('invertedbottleneck', 3, 2, 32, 'relu6',
-         None, 8., None, None, False, False, False),  # fused_conv
-        ('tucker', 3, 1, 32, 'relu6',
-         None, None, 0.25, 0.25, None, True, False),
-        ('tucker', 3, 1, 32, 'relu6',
-         None, None, 0.25, 0.25, None, True, False),
-        ('tucker', 3, 1, 32, 'relu6',
-         None, None, 0.25, 0.25, None, True, True),
-        # block 3
-        ('invertedbottleneck', 3, 2, 64, 'relu6',
-         None, 8., None, None, False, False, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 64, 'relu6',
-         None, 8., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 64, 'relu6',
-         None, 8., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 64, 'relu6',
-         None, 4., None, None, False, True, True),  # fused_conv
-        # block 4
-        ('invertedbottleneck', 3, 2, 128, 'relu6',
-         None, 8., None, None, False, False, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 128, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 128, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 128, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        # block 5
-        ('invertedbottleneck', 3, 1, 128, 'relu6',
-         None, 8., None, None, False, False, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 128, 'relu6',
-         None, 8., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 128, 'relu6',
-         None, 8., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 128, 'relu6',
-         None, 8., None, None, False, True, True),  # fused_conv
-        # block 6
-        ('invertedbottleneck', 3, 2, 128, 'relu6',
-         None, 4., None, None, False, False, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 128, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 128, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        ('invertedbottleneck', 3, 1, 128, 'relu6',
-         None, 4., None, None, False, True, False),  # fused_conv
-        # block 7
-        ('invertedbottleneck', 3, 1, 384, 'relu6',
-         None, 8, None, None, True, False, True),
-    ]
-}
-SUPPORTED_SPECS_MAP = {
-    'MobileDetCPU': MD_CPU_BLOCK_SPECS,
-    'MobileDetDSP': MD_DSP_BLOCK_SPECS,
-    'MobileDetEdgeTPU': MD_EdgeTPU_BLOCK_SPECS,
-    'MobileDetGPU': MD_GPU_BLOCK_SPECS,
-}
-@dataclasses.dataclass
-class BlockSpec(hyperparams.Config):
-  """A container class that specifies the block configuration for MobileDet."""
-  block_fn: str = 'convbn'
-  kernel_size: int = 3
-  strides: int = 1
-  filters: int = 32
-  use_bias: bool = False
-  use_normalization: bool = True
-  activation: str = 'relu6'
-  is_output: bool = True
-  # Used for block type InvertedResConv and TuckerConvBlock.
-  use_residual: bool = True
-  # Used for block type InvertedResConv only.
-  use_depthwise: bool = True
-  expand_ratio: Optional[float] = 8.
-  se_ratio: Optional[float] = None
-  # Used for block type TuckerConvBlock only.
-  input_compression_ratio: Optional[float] = None
-  output_compression_ratio: Optional[float] = None
-def block_spec_decoder(
-    specs: Dict[Any, Any],
-    filter_size_scale: float,
-    divisible_by: int = 8) -> List[BlockSpec]:
-  """Decodes specs for a block.
-  Args:
-    specs: A `dict` specification of block specs of a mobiledet version.
-    filter_size_scale: A `float` multiplier for the filter size for all
-      convolution ops. The value must be greater than zero. Typical usage will
-      be to set this value in (0, 1) to reduce the number of parameters or
-      computation cost of the model.
-    divisible_by: An `int` that ensures all inner dimensions are divisible by
-      this number.
-  Returns:
-    A list of `BlockSpec` that defines structure of the base network.
-  """
-  spec_name = specs['spec_name']
-  block_spec_schema = specs['block_spec_schema']
-  block_specs = specs['block_specs']
-  if not block_specs:
-    raise ValueError(
-        'The block spec cannot be empty for {} !'.format(spec_name))
-  if len(block_specs[0]) != len(block_spec_schema):
-    raise ValueError('The block spec values {} do not match with '
-                     'the schema {}'.format(block_specs[0], block_spec_schema))
-  decoded_specs = []
-  for s in block_specs:
-    kw_s = dict(zip(block_spec_schema, s))
-    decoded_specs.append(BlockSpec(**kw_s))
-  for ds in decoded_specs:
-    if ds.filters:
-      ds.filters = nn_layers.round_filters(filters=ds.filters,
-                                           multiplier=filter_size_scale,
-                                           divisor=divisible_by,
-                                           round_down_protect=False,
-                                           min_depth=8)
-  return decoded_specs
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class MobileDet(tf.keras.Model):
-  """Creates a MobileDet family model."""
-  def __init__(
-      self,
-      model_id: str = 'MobileDetCPU',
-      filter_size_scale: float = 1.0,
-      input_specs: tf.keras.layers.InputSpec = layers.InputSpec(
-          shape=[None, None, None, 3]),
-      # The followings are for hyper-parameter tuning.
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      # The followings should be kept the same most of the times.
-      min_depth: int = 8,
-      divisible_by: int = 8,
-      regularize_depthwise: bool = False,
-      use_sync_bn: bool = False,
-      **kwargs):
-    """Initializes a MobileDet model.
-    Args:
-      model_id: A `str` of MobileDet version. The supported values are
-        `MobileDetCPU`, `MobileDetDSP`, `MobileDetEdgeTPU`, `MobileDetGPU`.
-      filter_size_scale: A `float` of multiplier for the filters (number of
-        channels) for all convolution ops. The value must be greater than zero.
-        Typical usage will be to set this value in (0, 1) to reduce the number
-        of parameters or computation cost of the model.
-      input_specs: A `tf.keras.layers.InputSpec` of specs of the input tensor.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_initializer: A `str` for kernel initializer of convolutional
-        layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default to None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-        Default to None.
-      min_depth: An `int` of minimum depth (number of channels) for all
-        convolution ops. Enforced when filter_size_scale < 1, and not an active
-        constraint when filter_size_scale >= 1.
-      divisible_by: An `int` that ensures all inner dimensions are divisible by
-        this number.
-      regularize_depthwise: If Ture, apply regularization on depthwise.
-      use_sync_bn: If True, use synchronized batch normalization.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    if model_id not in SUPPORTED_SPECS_MAP:
-      raise ValueError('The MobileDet version {} '
-                       'is not supported'.format(model_id))
-    if filter_size_scale <= 0:
-      raise ValueError('filter_size_scale is not greater than zero.')
-    self._model_id = model_id
-    self._input_specs = input_specs
-    self._filter_size_scale = filter_size_scale
-    self._min_depth = min_depth
-    self._divisible_by = divisible_by
-    self._regularize_depthwise = regularize_depthwise
-    self._kernel_initializer = kernel_initializer
-    self._kernel_regularizer = kernel_regularizer
-    self._bias_regularizer = bias_regularizer
-    self._use_sync_bn = use_sync_bn
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    inputs = tf.keras.Input(shape=input_specs.shape[1:])
-    block_specs = SUPPORTED_SPECS_MAP.get(model_id)
-    self._decoded_specs = block_spec_decoder(
-        specs=block_specs,
-        filter_size_scale=self._filter_size_scale,
-        divisible_by=self._get_divisible_by())
-    x, endpoints, next_endpoint_level = self._mobiledet_base(inputs=inputs)
-    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
-    super(MobileDet, self).__init__(
-        inputs=inputs, outputs=endpoints, **kwargs)
-  def _get_divisible_by(self):
-    return self._divisible_by
-  def _mobiledet_base(self,
-                      inputs: tf.Tensor
-                      ) -> Tuple[tf.Tensor, Dict[str, tf.Tensor], int]:
-    """Builds the base MobileDet architecture.
-    Args:
-      inputs: A `tf.Tensor` of shape `[batch_size, height, width, channels]`.
-    Returns:
-      A tuple of output Tensor and dictionary that collects endpoints.
-    """
-    input_shape = inputs.get_shape().as_list()
-    if len(input_shape) != 4:
-      raise ValueError('Expected rank 4 input, was: %d' % len(input_shape))
-    net = inputs
-    endpoints = {}
-    endpoint_level = 1
-    for i, block_def in enumerate(self._decoded_specs):
-      block_name = 'block_group_{}_{}'.format(block_def.block_fn, i)
-      if block_def.block_fn == 'convbn':
-        net = mobilenet.Conv2DBNBlock(
-            filters=block_def.filters,
-            kernel_size=block_def.kernel_size,
-            strides=block_def.strides,
-            activation=block_def.activation,
-            use_bias=block_def.use_bias,
-            use_normalization=block_def.use_normalization,
-            kernel_initializer=self._kernel_initializer,
-            kernel_regularizer=self._kernel_regularizer,
-            bias_regularizer=self._bias_regularizer,
-            use_sync_bn=self._use_sync_bn,
-            norm_momentum=self._norm_momentum,
-            norm_epsilon=self._norm_epsilon
-        )(net)
-      elif block_def.block_fn == 'invertedbottleneck':
-        in_filters = net.shape.as_list()[-1]
-        net = nn_blocks.InvertedBottleneckBlock(
-            in_filters=in_filters,
-            out_filters=block_def.filters,
-            kernel_size=block_def.kernel_size,
-            strides=block_def.strides,
-            expand_ratio=block_def.expand_ratio,
-            se_ratio=block_def.se_ratio,
-            se_inner_activation=block_def.activation,
-            se_gating_activation='sigmoid',
-            se_round_down_protect=False,
-            expand_se_in_filters=True,
-            activation=block_def.activation,
-            use_depthwise=block_def.use_depthwise,
-            use_residual=block_def.use_residual,
-            regularize_depthwise=self._regularize_depthwise,
-            kernel_initializer=self._kernel_initializer,
-            kernel_regularizer=self._kernel_regularizer,
-            bias_regularizer=self._bias_regularizer,
-            use_sync_bn=self._use_sync_bn,
-            norm_momentum=self._norm_momentum,
-            norm_epsilon=self._norm_epsilon,
-            divisible_by=self._get_divisible_by()
-        )(net)
-      elif block_def.block_fn == 'tucker':
-        in_filters = net.shape.as_list()[-1]
-        net = nn_blocks.TuckerConvBlock(
-            in_filters=in_filters,
-            out_filters=block_def.filters,
-            kernel_size=block_def.kernel_size,
-            strides=block_def.strides,
-            input_compression_ratio=block_def.input_compression_ratio,
-            output_compression_ratio=block_def.output_compression_ratio,
-            activation=block_def.activation,
-            use_residual=block_def.use_residual,
-            kernel_initializer=self._kernel_initializer,
-            kernel_regularizer=self._kernel_regularizer,
-            bias_regularizer=self._bias_regularizer,
-            use_sync_bn=self._use_sync_bn,
-            norm_momentum=self._norm_momentum,
-            norm_epsilon=self._norm_epsilon,
-            divisible_by=self._get_divisible_by()
-        )(net)
-      else:
-        raise ValueError('Unknown block type {} for layer {}'.format(
-            block_def.block_fn, i))
-      net = tf.keras.layers.Activation('linear', name=block_name)(net)
-      if block_def.is_output:
-        endpoints[str(endpoint_level)] = net
-        endpoint_level += 1
-    return net, endpoints, endpoint_level
-  def get_config(self):
-    config_dict = {
-        'model_id': self._model_id,
-        'filter_size_scale': self._filter_size_scale,
-        'min_depth': self._min_depth,
-        'divisible_by': self._divisible_by,
-        'regularize_depthwise': self._regularize_depthwise,
-        'kernel_initializer': self._kernel_initializer,
-        'kernel_regularizer': self._kernel_regularizer,
-        'bias_regularizer': self._bias_regularizer,
-        'use_sync_bn': self._use_sync_bn,
-        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon,
-    }
-    return config_dict
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-  @property
-  def output_specs(self):
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-@factory.register_backbone_builder('mobiledet')
-def build_mobiledet(
-    input_specs: tf.keras.layers.InputSpec,
-    backbone_config: hyperparams.Config,
-    norm_activation_config: hyperparams.Config,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
-) -> tf.keras.Model:
-  """Builds MobileDet backbone from a config."""
-  backbone_type = backbone_config.type
-  backbone_cfg = backbone_config.get()
-  assert backbone_type == 'mobiledet', (f'Inconsistent backbone type '
-                                        f'{backbone_type}')
-  return MobileDet(
-      model_id=backbone_cfg.model_id,
-      filter_size_scale=backbone_cfg.filter_size_scale,
-      input_specs=input_specs,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
--- a/official/vision/modeling/backbones/mobiledet_test.py
+++ b/official/vision/modeling/backbones/mobiledet_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Tests for Mobiledet."""
-import itertools
-from absl.testing import parameterized
-import tensorflow as tf
-from official.vision.modeling.backbones import mobiledet
-class MobileDetTest(parameterized.TestCase, tf.test.TestCase):
-  @parameterized.parameters(
-      'MobileDetCPU',
-      'MobileDetDSP',
-      'MobileDetEdgeTPU',
-      'MobileDetGPU',
-  )
-  def test_serialize_deserialize(self, model_id):
-    # Create a network object that sets all of its config options.
-    kwargs = dict(
-        model_id=model_id,
-        filter_size_scale=1.0,
-        use_sync_bn=False,
-        kernel_initializer='VarianceScaling',
-        kernel_regularizer=None,
-        bias_regularizer=None,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        min_depth=8,
-        divisible_by=8,
-        regularize_depthwise=False,
-    )
-    network = mobiledet.MobileDet(**kwargs)
-    expected_config = dict(kwargs)
-    self.assertEqual(network.get_config(), expected_config)
-    # Create another network object from the first object's config.
-    new_network = mobiledet.MobileDet.from_config(network.get_config())
-    # Validate that the config can be forced to JSON.
-    _ = new_network.to_json()
-    # If the serialization was successful, the new config should match the old.
-    self.assertAllEqual(network.get_config(), new_network.get_config())
-  @parameterized.parameters(
-      itertools.product(
-          [1, 3],
-          [
-              'MobileDetCPU',
-              'MobileDetDSP',
-              'MobileDetEdgeTPU',
-              'MobileDetGPU',
-          ],
-      ))
-  def test_input_specs(self, input_dim, model_id):
-    """Test different input feature dimensions."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
-    network = mobiledet.MobileDet(model_id=model_id, input_specs=input_specs)
-    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
-    _ = network(inputs)
-  @parameterized.parameters(
-      itertools.product(
-          [
-              'MobileDetCPU',
-              'MobileDetDSP',
-              'MobileDetEdgeTPU',
-              'MobileDetGPU',
-          ],
-          [32, 224],
-      ))
-  def test_mobiledet_creation(self, model_id, input_size):
-    """Test creation of MobileDet family models."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    mobiledet_layers = {
-        # The number of filters of layers having outputs been collected
-        # for filter_size_scale = 1.0
-        'MobileDetCPU': [8, 16, 32, 72, 144],
-        'MobileDetDSP': [24, 32, 64, 144, 240],
-        'MobileDetEdgeTPU': [16, 16, 40, 96, 384],
-        'MobileDetGPU': [16, 32, 64, 128, 384],
-    }
-    network = mobiledet.MobileDet(model_id=model_id,
-                                  filter_size_scale=1.0)
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    endpoints = network(inputs)
-    for idx, num_filter in enumerate(mobiledet_layers[model_id]):
-      self.assertAllEqual(
-          [1, input_size / 2 ** (idx+1), input_size / 2 ** (idx+1), num_filter],
-          endpoints[str(idx+1)].shape.as_list())
--- a/official/vision/modeling/backbones/mobilenet.py
+++ b/official/vision/modeling/backbones/mobilenet.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains definitions of MobileNet Networks."""
-import dataclasses
-from typing import Optional, Dict, Any, Tuple
-# Import libraries
-import tensorflow as tf
-from official.modeling import hyperparams
-from official.modeling import tf_utils
-from official.vision.modeling.backbones import factory
-from official.vision.modeling.layers import nn_blocks
-from official.vision.modeling.layers import nn_layers
-layers = tf.keras.layers
-#  pylint: disable=pointless-string-statement
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class Conv2DBNBlock(tf.keras.layers.Layer):
-  """A convolution block with batch normalization."""
-  def __init__(
-      self,
-      filters: int,
-      kernel_size: int = 3,
-      strides: int = 1,
-      use_bias: bool = False,
-      use_explicit_padding: bool = False,
-      activation: str = 'relu6',
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      use_normalization: bool = True,
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      **kwargs):
-    """A convolution block with batch normalization.
-    Args:
-      filters: An `int` number of filters for the first two convolutions. Note
-        that the third and final convolution will use 4 times as many filters.
-      kernel_size: An `int` specifying the height and width of the 2D
-        convolution window.
-      strides: An `int` of block stride. If greater than 1, this block will
-        ultimately downsample the input.
-      use_bias: If True, use bias in the convolution layer.
-      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
-        inputs so that the output dimensions are the same as if 'SAME' padding
-        were used.
-      activation: A `str` name of the activation function.
-      kernel_initializer: A `str` for kernel initializer of convolutional
-        layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default to None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-        Default to None.
-      use_normalization: If True, use batch normalization.
-      use_sync_bn: If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    super(Conv2DBNBlock, self).__init__(**kwargs)
-    self._filters = filters
-    self._kernel_size = kernel_size
-    self._strides = strides
-    self._activation = activation
-    self._use_bias = use_bias
-    self._use_explicit_padding = use_explicit_padding
-    self._kernel_initializer = kernel_initializer
-    self._kernel_regularizer = kernel_regularizer
-    self._bias_regularizer = bias_regularizer
-    self._use_normalization = use_normalization
-    self._use_sync_bn = use_sync_bn
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    if use_explicit_padding and kernel_size > 1:
-      self._padding = 'valid'
-    else:
-      self._padding = 'same'
-    if use_sync_bn:
-      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
-    else:
-      self._norm = tf.keras.layers.BatchNormalization
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._bn_axis = -1
-    else:
-      self._bn_axis = 1
-  def get_config(self):
-    config = {
-        'filters': self._filters,
-        'strides': self._strides,
-        'kernel_size': self._kernel_size,
-        'use_bias': self._use_bias,
-        'use_explicit_padding': self._use_explicit_padding,
-        'kernel_initializer': self._kernel_initializer,
-        'kernel_regularizer': self._kernel_regularizer,
-        'bias_regularizer': self._bias_regularizer,
-        'activation': self._activation,
-        'use_sync_bn': self._use_sync_bn,
-        'use_normalization': self._use_normalization,
-        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon
-    }
-    base_config = super(Conv2DBNBlock, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-  def build(self, input_shape):
-    if self._use_explicit_padding and self._kernel_size > 1:
-      padding_size = nn_layers.get_padding_for_kernel_size(self._kernel_size)
-      self._pad = tf.keras.layers.ZeroPadding2D(padding_size)
-    self._conv0 = tf.keras.layers.Conv2D(
-        filters=self._filters,
-        kernel_size=self._kernel_size,
-        strides=self._strides,
-        padding=self._padding,
-        use_bias=self._use_bias,
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer)
-    if self._use_normalization:
-      self._norm0 = self._norm(
-          axis=self._bn_axis,
-          momentum=self._norm_momentum,
-          epsilon=self._norm_epsilon)
-    self._activation_layer = tf_utils.get_activation(
-        self._activation, use_keras_layer=True)
-    super(Conv2DBNBlock, self).build(input_shape)
-  def call(self, inputs, training=None):
-    if self._use_explicit_padding and self._kernel_size > 1:
-      inputs = self._pad(inputs)
-    x = self._conv0(inputs)
-    if self._use_normalization:
-      x = self._norm0(x)
-    return self._activation_layer(x)
-"""
-Architecture: https://arxiv.org/abs/1704.04861.
-"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision
-Applications" Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko,
-Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam
-"""
-MNV1_BLOCK_SPECS = {
-    'spec_name': 'MobileNetV1',
-    'block_spec_schema': ['block_fn', 'kernel_size', 'strides',
-                          'filters', 'is_output'],
-    'block_specs': [
-        ('convbn', 3, 2, 32, False),
-        ('depsepconv', 3, 1, 64, False),
-        ('depsepconv', 3, 2, 128, False),
-        ('depsepconv', 3, 1, 128, True),
-        ('depsepconv', 3, 2, 256, False),
-        ('depsepconv', 3, 1, 256, True),
-        ('depsepconv', 3, 2, 512, False),
-        ('depsepconv', 3, 1, 512, False),
-        ('depsepconv', 3, 1, 512, False),
-        ('depsepconv', 3, 1, 512, False),
-        ('depsepconv', 3, 1, 512, False),
-        ('depsepconv', 3, 1, 512, True),
-        ('depsepconv', 3, 2, 1024, False),
-        ('depsepconv', 3, 1, 1024, True),
-    ]
-}
-"""
-Architecture: https://arxiv.org/abs/1801.04381
-"MobileNetV2: Inverted Residuals and Linear Bottlenecks"
-Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen
-"""
-MNV2_BLOCK_SPECS = {
-    'spec_name': 'MobileNetV2',
-    'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters',
-                          'expand_ratio', 'is_output'],
-    'block_specs': [
-        ('convbn', 3, 2, 32, None, False),
-        ('invertedbottleneck', 3, 1, 16, 1., False),
-        ('invertedbottleneck', 3, 2, 24, 6., False),
-        ('invertedbottleneck', 3, 1, 24, 6., True),
-        ('invertedbottleneck', 3, 2, 32, 6., False),
-        ('invertedbottleneck', 3, 1, 32, 6., False),
-        ('invertedbottleneck', 3, 1, 32, 6., True),
-        ('invertedbottleneck', 3, 2, 64, 6., False),
-        ('invertedbottleneck', 3, 1, 64, 6., False),
-        ('invertedbottleneck', 3, 1, 64, 6., False),
-        ('invertedbottleneck', 3, 1, 64, 6., False),
-        ('invertedbottleneck', 3, 1, 96, 6., False),
-        ('invertedbottleneck', 3, 1, 96, 6., False),
-        ('invertedbottleneck', 3, 1, 96, 6., True),
-        ('invertedbottleneck', 3, 2, 160, 6., False),
-        ('invertedbottleneck', 3, 1, 160, 6., False),
-        ('invertedbottleneck', 3, 1, 160, 6., False),
-        ('invertedbottleneck', 3, 1, 320, 6., True),
-        ('convbn', 1, 1, 1280, None, False),
-    ]
-}
-"""
-Architecture: https://arxiv.org/abs/1905.02244
-"Searching for MobileNetV3"
-Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan,
-Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam
-"""
-MNV3Large_BLOCK_SPECS = {
-    'spec_name': 'MobileNetV3Large',
-    'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters',
-                          'activation', 'se_ratio', 'expand_ratio',
-                          'use_normalization', 'use_bias', 'is_output'],
-    'block_specs': [
-        ('convbn', 3, 2, 16,
-         'hard_swish', None, None, True, False, False),
-        ('invertedbottleneck', 3, 1, 16,
-         'relu', None, 1., None, False, False),
-        ('invertedbottleneck', 3, 2, 24,
-         'relu', None, 4., None, False, False),
-        ('invertedbottleneck', 3, 1, 24,
-         'relu', None, 3., None, False, True),
-        ('invertedbottleneck', 5, 2, 40,
-         'relu', 0.25, 3., None, False, False),
-        ('invertedbottleneck', 5, 1, 40,
-         'relu', 0.25, 3., None, False, False),
-        ('invertedbottleneck', 5, 1, 40,
-         'relu', 0.25, 3., None, False, True),
-        ('invertedbottleneck', 3, 2, 80,
-         'hard_swish', None, 6., None, False, False),
-        ('invertedbottleneck', 3, 1, 80,
-         'hard_swish', None, 2.5, None, False, False),
-        ('invertedbottleneck', 3, 1, 80,
-         'hard_swish', None, 2.3, None, False, False),
-        ('invertedbottleneck', 3, 1, 80,
-         'hard_swish', None, 2.3, None, False, False),
-        ('invertedbottleneck', 3, 1, 112,
-         'hard_swish', 0.25, 6., None, False, False),
-        ('invertedbottleneck', 3, 1, 112,
-         'hard_swish', 0.25, 6., None, False, True),
-        ('invertedbottleneck', 5, 2, 160,
-         'hard_swish', 0.25, 6., None, False, False),
-        ('invertedbottleneck', 5, 1, 160,
-         'hard_swish', 0.25, 6., None, False, False),
-        ('invertedbottleneck', 5, 1, 160,
-         'hard_swish', 0.25, 6., None, False, True),
-        ('convbn', 1, 1, 960,
-         'hard_swish', None, None, True, False, False),
-        ('gpooling', None, None, None,
-         None, None, None, None, None, False),
-        ('convbn', 1, 1, 1280,
-         'hard_swish', None, None, False, True, False),
-    ]
-}
-MNV3Small_BLOCK_SPECS = {
-    'spec_name': 'MobileNetV3Small',
-    'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters',
-                          'activation', 'se_ratio', 'expand_ratio',
-                          'use_normalization', 'use_bias', 'is_output'],
-    'block_specs': [
-        ('convbn', 3, 2, 16,
-         'hard_swish', None, None, True, False, False),
-        ('invertedbottleneck', 3, 2, 16,
-         'relu', 0.25, 1, None, False, True),
-        ('invertedbottleneck', 3, 2, 24,
-         'relu', None, 72. / 16, None, False, False),
-        ('invertedbottleneck', 3, 1, 24,
-         'relu', None, 88. / 24, None, False, True),
-        ('invertedbottleneck', 5, 2, 40,
-         'hard_swish', 0.25, 4., None, False, False),
-        ('invertedbottleneck', 5, 1, 40,
-         'hard_swish', 0.25, 6., None, False, False),
-        ('invertedbottleneck', 5, 1, 40,
-         'hard_swish', 0.25, 6., None, False, False),
-        ('invertedbottleneck', 5, 1, 48,
-         'hard_swish', 0.25, 3., None, False, False),
-        ('invertedbottleneck', 5, 1, 48,
-         'hard_swish', 0.25, 3., None, False, True),
-        ('invertedbottleneck', 5, 2, 96,
-         'hard_swish', 0.25, 6., None, False, False),
-        ('invertedbottleneck', 5, 1, 96,
-         'hard_swish', 0.25, 6., None, False, False),
-        ('invertedbottleneck', 5, 1, 96,
-         'hard_swish', 0.25, 6., None, False, True),
-        ('convbn', 1, 1, 576,
-         'hard_swish', None, None, True, False, False),
-        ('gpooling', None, None, None,
-         None, None, None, None, None, False),
-        ('convbn', 1, 1, 1024,
-         'hard_swish', None, None, False, True, False),
-    ]
-}
-"""
-The EdgeTPU version is taken from
-github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v3.py
-"""
-MNV3EdgeTPU_BLOCK_SPECS = {
-    'spec_name': 'MobileNetV3EdgeTPU',
-    'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters',
-                          'activation', 'se_ratio', 'expand_ratio',
-                          'use_residual', 'use_depthwise', 'is_output'],
-    'block_specs': [
-        ('convbn', 3, 2, 32, 'relu', None, None, None, None, False),
-        ('invertedbottleneck', 3, 1, 16, 'relu', None, 1., True, False, False),
-        ('invertedbottleneck', 3, 2, 32, 'relu', None, 8., True, False, False),
-        ('invertedbottleneck', 3, 1, 32, 'relu', None, 4., True, False, False),
-        ('invertedbottleneck', 3, 1, 32, 'relu', None, 4., True, False, False),
-        ('invertedbottleneck', 3, 1, 32, 'relu', None, 4., True, False, True),
-        ('invertedbottleneck', 3, 2, 48, 'relu', None, 8., True, False, False),
-        ('invertedbottleneck', 3, 1, 48, 'relu', None, 4., True, False, False),
-        ('invertedbottleneck', 3, 1, 48, 'relu', None, 4., True, False, False),
-        ('invertedbottleneck', 3, 1, 48, 'relu', None, 4., True, False, True),
-        ('invertedbottleneck', 3, 2, 96, 'relu', None, 8., True, True, False),
-        ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, False),
-        ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, False),
-        ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, False),
-        ('invertedbottleneck', 3, 1, 96, 'relu', None, 8., False, True, False),
-        ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, False),
-        ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, False),
-        ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, True),
-        ('invertedbottleneck', 5, 2, 160, 'relu', None, 8., True, True, False),
-        ('invertedbottleneck', 5, 1, 160, 'relu', None, 4., True, True, False),
-        ('invertedbottleneck', 5, 1, 160, 'relu', None, 4., True, True, False),
-        ('invertedbottleneck', 5, 1, 160, 'relu', None, 4., True, True, False),
-        ('invertedbottleneck', 3, 1, 192, 'relu', None, 8., True, True, True),
-        ('convbn', 1, 1, 1280, 'relu', None, None, None, None, False),
-    ]
-}
-"""
-Architecture: https://arxiv.org/pdf/2008.08178.pdf
-"Discovering Multi-Hardware Mobile Models via Architecture Search"
-Grace Chu, Okan Arikan, Gabriel Bender, Weijun Wang,
-Achille Brighton, Pieter-Jan Kindermans, Hanxiao Liu,
-Berkin Akin, Suyog Gupta, and Andrew Howard
-"""
-MNMultiMAX_BLOCK_SPECS = {
-    'spec_name': 'MobileNetMultiMAX',
-    'block_spec_schema': [
-        'block_fn', 'kernel_size', 'strides', 'filters', 'activation',
-        'expand_ratio', 'use_normalization', 'use_bias', 'is_output'
-    ],
-    'block_specs': [
-        ('convbn', 3, 2, 32, 'relu', None, True, False, False),
-        ('invertedbottleneck', 3, 2, 32, 'relu', 3., None, False, True),
-        ('invertedbottleneck', 5, 2, 64, 'relu', 6., None, False, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu', 2., None, False, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu', 2., None, False, True),
-        ('invertedbottleneck', 5, 2, 128, 'relu', 6., None, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 4., None, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 6., None, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, True),
-        ('invertedbottleneck', 3, 2, 160, 'relu', 6., None, False, False),
-        ('invertedbottleneck', 5, 1, 160, 'relu', 4., None, False, False),
-        ('invertedbottleneck', 3, 1, 160, 'relu', 5., None, False, False),
-        ('invertedbottleneck', 5, 1, 160, 'relu', 4., None, False, True),
-        ('convbn', 1, 1, 960, 'relu', None, True, False, False),
-        ('gpooling', None, None, None, None, None, None, None, False),
-        # Remove bias and add batch norm for the last layer to support QAT
-        # and achieve slightly better accuracy.
-        ('convbn', 1, 1, 1280, 'relu', None, True, False, False),
-    ]
-}
-MNMultiAVG_BLOCK_SPECS = {
-    'spec_name': 'MobileNetMultiAVG',
-    'block_spec_schema': [
-        'block_fn', 'kernel_size', 'strides', 'filters', 'activation',
-        'expand_ratio', 'use_normalization', 'use_bias', 'is_output'
-    ],
-    'block_specs': [
-        ('convbn', 3, 2, 32, 'relu', None, True, False, False),
-        ('invertedbottleneck', 3, 2, 32, 'relu', 3., None, False, False),
-        ('invertedbottleneck', 3, 1, 32, 'relu', 2., None, False, True),
-        ('invertedbottleneck', 5, 2, 64, 'relu', 5., None, False, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu', 3., None, False, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu', 2., None, False, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu', 3., None, False, True),
-        ('invertedbottleneck', 5, 2, 128, 'relu', 6., None, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, False),
-        ('invertedbottleneck', 3, 1, 160, 'relu', 6., None, False, False),
-        ('invertedbottleneck', 3, 1, 160, 'relu', 4., None, False, True),
-        ('invertedbottleneck', 3, 2, 192, 'relu', 6., None, False, False),
-        ('invertedbottleneck', 5, 1, 192, 'relu', 4., None, False, False),
-        ('invertedbottleneck', 5, 1, 192, 'relu', 4., None, False, False),
-        ('invertedbottleneck', 5, 1, 192, 'relu', 4., None, False, True),
-        ('convbn', 1, 1, 960, 'relu', None, True, False, False),
-        ('gpooling', None, None, None, None, None, None, None, False),
-        # Remove bias and add batch norm for the last layer to support QAT
-        # and achieve slightly better accuracy.
-        ('convbn', 1, 1, 1280, 'relu', None, True, False, False),
-    ]
-}
-# Similar to MobileNetMultiAVG and used for segmentation task.
-# Reduced the filters by a factor of 2 in the last block.
-MNMultiAVG_SEG_BLOCK_SPECS = {
-    'spec_name':
-        'MobileNetMultiAVGSeg',
-    'block_spec_schema': [
-        'block_fn', 'kernel_size', 'strides', 'filters', 'activation',
-        'expand_ratio', 'use_normalization', 'use_bias', 'is_output'
-    ],
-    'block_specs': [
-        ('convbn', 3, 2, 32, 'relu', None, True, False, False),
-        ('invertedbottleneck', 3, 2, 32, 'relu', 3., True, False, False),
-        ('invertedbottleneck', 3, 1, 32, 'relu', 2., True, False, True),
-        ('invertedbottleneck', 5, 2, 64, 'relu', 5., True, False, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu', 3., True, False, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu', 2., True, False, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu', 3., True, False, True),
-        ('invertedbottleneck', 5, 2, 128, 'relu', 6., True, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., True, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., True, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., True, False, False),
-        ('invertedbottleneck', 3, 1, 160, 'relu', 6., True, False, False),
-        ('invertedbottleneck', 3, 1, 160, 'relu', 4., True, False, True),
-        ('invertedbottleneck', 3, 2, 192, 'relu', 6., True, False, False),
-        ('invertedbottleneck', 5, 1, 96, 'relu', 2., True, False, False),
-        ('invertedbottleneck', 5, 1, 96, 'relu', 4., True, False, False),
-        ('invertedbottleneck', 5, 1, 96, 'relu', 4., True, False, True),
-        ('convbn', 1, 1, 448, 'relu', None, True, False, True),
-        ('gpooling', None, None, None, None, None, None, None, False),
-        # Remove bias and add batch norm for the last layer to support QAT
-        # and achieve slightly better accuracy.
-        ('convbn', 1, 1, 1280, 'relu', None, True, False, False),
-    ]
-}
-# Similar to MobileNetMultiMax and used for segmentation task.
-# Reduced the filters by a factor of 2 in the last block.
-MNMultiMAX_SEG_BLOCK_SPECS = {
-    'spec_name':
-        'MobileNetMultiMAXSeg',
-    'block_spec_schema': [
-        'block_fn', 'kernel_size', 'strides', 'filters', 'activation',
-        'expand_ratio', 'use_normalization', 'use_bias', 'is_output'
-    ],
-    'block_specs': [
-        ('convbn', 3, 2, 32, 'relu', None, True, False, False),
-        ('invertedbottleneck', 3, 2, 32, 'relu', 3., True, False, True),
-        ('invertedbottleneck', 5, 2, 64, 'relu', 6., True, False, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu', 2., True, False, False),
-        ('invertedbottleneck', 3, 1, 64, 'relu', 2., True, False, True),
-        ('invertedbottleneck', 5, 2, 128, 'relu', 6., True, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 4., True, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., True, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., True, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 6., True, False, False),
-        ('invertedbottleneck', 3, 1, 128, 'relu', 3., True, False, True),
-        ('invertedbottleneck', 3, 2, 160, 'relu', 6., True, False, False),
-        ('invertedbottleneck', 5, 1, 96, 'relu', 2., True, False, False),
-        ('invertedbottleneck', 3, 1, 96, 'relu', 4., True, False, False),
-        ('invertedbottleneck', 5, 1, 96, 'relu', 320.0 / 96, True, False, True),
-        ('convbn', 1, 1, 448, 'relu', None, True, False, True),
-        ('gpooling', None, None, None, None, None, None, None, False),
-        # Remove bias and add batch norm for the last layer to support QAT
-        # and achieve slightly better accuracy.
-        ('convbn', 1, 1, 1280, 'relu', None, True, False, False),
-    ]
-}
-# A smaller MNV3Small, with reduced filters for the last few layers
-MNV3SmallReducedFilters = {
-    'spec_name':
-        'MobilenetV3SmallReducedFilters',
-    'block_spec_schema': [
-        'block_fn', 'kernel_size', 'strides', 'filters', 'activation',
-        'se_ratio', 'expand_ratio', 'use_normalization', 'use_bias', 'is_output'
-    ],
-    'block_specs': [
-        ('convbn', 3, 2, 16, 'hard_swish', None, None, True, False, False),
-        ('invertedbottleneck', 3, 2, 16, 'relu', 0.25, 1, None, False, True),
-        ('invertedbottleneck', 3, 2, 24, 'relu', None, 72. / 16, None, False,
-         False),
-        ('invertedbottleneck', 3, 1, 24, 'relu', None, 88. / 24, None, False,
-         True),
-        ('invertedbottleneck', 5, 2, 40, 'hard_swish', 0.25, 4, None, False,
-         False),
-        ('invertedbottleneck', 5, 1, 40, 'hard_swish', 0.25, 6, None, False,
-         False),
-        ('invertedbottleneck', 5, 1, 40, 'hard_swish', 0.25, 6, None, False,
-         False),
-        ('invertedbottleneck', 5, 1, 48, 'hard_swish', 0.25, 3, None, False,
-         False),
-        ('invertedbottleneck', 5, 1, 48, 'hard_swish', 0.25, 3, None, False,
-         True),
-        # Layers below are different from MobileNetV3Small and have
-        # half as many filters
-        ('invertedbottleneck', 5, 2, 48, 'hard_swish', 0.25, 3, None, False,
-         False),
-        ('invertedbottleneck', 5, 1, 48, 'hard_swish', 0.25, 6, None, False,
-         False),
-        ('invertedbottleneck', 5, 1, 48, 'hard_swish', 0.25, 6, None, False,
-         True),
-        ('convbn', 1, 1, 288, 'hard_swish', None, None, True, False, False),
-        ('gpooling', None, None, None, None, None, None, None, None, False),
-        ('convbn', 1, 1, 1024, 'hard_swish', None, None, False, True, False),
-    ]
-}
-SUPPORTED_SPECS_MAP = {
-    'MobileNetV1': MNV1_BLOCK_SPECS,
-    'MobileNetV2': MNV2_BLOCK_SPECS,
-    'MobileNetV3Large': MNV3Large_BLOCK_SPECS,
-    'MobileNetV3Small': MNV3Small_BLOCK_SPECS,
-    'MobileNetV3EdgeTPU': MNV3EdgeTPU_BLOCK_SPECS,
-    'MobileNetMultiMAX': MNMultiMAX_BLOCK_SPECS,
-    'MobileNetMultiAVG': MNMultiAVG_BLOCK_SPECS,
-    'MobileNetMultiAVGSeg': MNMultiAVG_SEG_BLOCK_SPECS,
-    'MobileNetMultiMAXSeg': MNMultiMAX_SEG_BLOCK_SPECS,
-    'MobileNetV3SmallReducedFilters': MNV3SmallReducedFilters,
-}
-@dataclasses.dataclass
-class BlockSpec(hyperparams.Config):
-  """A container class that specifies the block configuration for MobileNet."""
-  block_fn: str = 'convbn'
-  kernel_size: int = 3
-  strides: int = 1
-  filters: int = 32
-  use_bias: bool = False
-  use_normalization: bool = True
-  activation: str = 'relu6'
-  # Used for block type InvertedResConv.
-  expand_ratio: Optional[float] = 6.
-  # Used for block type InvertedResConv with SE.
-  se_ratio: Optional[float] = None
-  use_depthwise: bool = True
-  use_residual: bool = True
-  is_output: bool = True
-def block_spec_decoder(
-    specs: Dict[Any, Any],
-    filter_size_scale: float,
-    # Set to 1 for mobilenetv1.
-    divisible_by: int = 8,
-    finegrain_classification_mode: bool = True):
-  """Decodes specs for a block.
-  Args:
-    specs: A `dict` specification of block specs of a mobilenet version.
-    filter_size_scale: A `float` multiplier for the filter size for all
-      convolution ops. The value must be greater than zero. Typical usage will
-      be to set this value in (0, 1) to reduce the number of parameters or
-      computation cost of the model.
-    divisible_by: An `int` that ensures all inner dimensions are divisible by
-      this number.
-    finegrain_classification_mode: If True, the model will keep the last layer
-      large even for small multipliers, following
-      https://arxiv.org/abs/1801.04381.
-  Returns:
-    A list of `BlockSpec` that defines structure of the base network.
-  """
-  spec_name = specs['spec_name']
-  block_spec_schema = specs['block_spec_schema']
-  block_specs = specs['block_specs']
-  if not block_specs:
-    raise ValueError(
-        'The block spec cannot be empty for {} !'.format(spec_name))
-  if len(block_specs[0]) != len(block_spec_schema):
-    raise ValueError('The block spec values {} do not match with '
-                     'the schema {}'.format(block_specs[0], block_spec_schema))
-  decoded_specs = []
-  for s in block_specs:
-    kw_s = dict(zip(block_spec_schema, s))
-    decoded_specs.append(BlockSpec(**kw_s))
-  # This adjustment applies to V2 and V3
-  if (spec_name != 'MobileNetV1'
-      and finegrain_classification_mode
-      and filter_size_scale < 1.0):
-    decoded_specs[-1].filters /= filter_size_scale  # pytype: disable=annotation-type-mismatch
-  for ds in decoded_specs:
-    if ds.filters:
-      ds.filters = nn_layers.round_filters(filters=ds.filters,
-                                           multiplier=filter_size_scale,
-                                           divisor=divisible_by,
-                                           min_depth=8)
-  return decoded_specs
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class MobileNet(tf.keras.Model):
-  """Creates a MobileNet family model."""
-  def __init__(
-      self,
-      model_id: str = 'MobileNetV2',
-      filter_size_scale: float = 1.0,
-      input_specs: tf.keras.layers.InputSpec = layers.InputSpec(
-          shape=[None, None, None, 3]),
-      # The followings are for hyper-parameter tuning.
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      # The followings should be kept the same most of the times.
-      output_stride: Optional[int] = None,
-      min_depth: int = 8,
-      # divisible is not used in MobileNetV1.
-      divisible_by: int = 8,
-      stochastic_depth_drop_rate: float = 0.0,
-      regularize_depthwise: bool = False,
-      use_sync_bn: bool = False,
-      # finegrain is not used in MobileNetV1.
-      finegrain_classification_mode: bool = True,
-      output_intermediate_endpoints: bool = False,
-      **kwargs):
-    """Initializes a MobileNet model.
-    Args:
-      model_id: A `str` of MobileNet version. The supported values are
-        `MobileNetV1`, `MobileNetV2`, `MobileNetV3Large`, `MobileNetV3Small`,
-        `MobileNetV3EdgeTPU`, `MobileNetMultiMAX` and `MobileNetMultiAVG`.
-      filter_size_scale: A `float` of multiplier for the filters (number of
-        channels) for all convolution ops. The value must be greater than zero.
-        Typical usage will be to set this value in (0, 1) to reduce the number
-        of parameters or computation cost of the model.
-      input_specs: A `tf.keras.layers.InputSpec` of specs of the input tensor.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_initializer: A `str` for kernel initializer of convolutional
-        layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default to None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-        Default to None.
-      output_stride: An `int` that specifies the requested ratio of input to
-        output spatial resolution. If not None, then we invoke atrous
-        convolution if necessary to prevent the network from reducing the
-        spatial resolution of activation maps. Allowed values are 8 (accurate
-        fully convolutional mode), 16 (fast fully convolutional mode), 32
-        (classification mode).
-      min_depth: An `int` of minimum depth (number of channels) for all
-        convolution ops. Enforced when filter_size_scale < 1, and not an active
-        constraint when filter_size_scale >= 1.
-      divisible_by: An `int` that ensures all inner dimensions are divisible by
-        this number.
-      stochastic_depth_drop_rate: A `float` of drop rate for drop connect layer.
-      regularize_depthwise: If Ture, apply regularization on depthwise.
-      use_sync_bn: If True, use synchronized batch normalization.
-      finegrain_classification_mode: If True, the model will keep the last layer
-        large even for small multipliers, following
-        https://arxiv.org/abs/1801.04381.
-      output_intermediate_endpoints: A `bool` of whether or not output the
-        intermediate endpoints.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    if model_id not in SUPPORTED_SPECS_MAP:
-      raise ValueError('The MobileNet version {} '
-                       'is not supported'.format(model_id))
-    if filter_size_scale <= 0:
-      raise ValueError('filter_size_scale is not greater than zero.')
-    if output_stride is not None:
-      if model_id == 'MobileNetV1':
-        if output_stride not in [8, 16, 32]:
-          raise ValueError('Only allowed output_stride values are 8, 16, 32.')
-      else:
-        if output_stride == 0 or (output_stride > 1 and output_stride % 2):
-          raise ValueError('Output stride must be None, 1 or a multiple of 2.')
-    self._model_id = model_id
-    self._input_specs = input_specs
-    self._filter_size_scale = filter_size_scale
-    self._min_depth = min_depth
-    self._output_stride = output_stride
-    self._divisible_by = divisible_by
-    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
-    self._regularize_depthwise = regularize_depthwise
-    self._kernel_initializer = kernel_initializer
-    self._kernel_regularizer = kernel_regularizer
-    self._bias_regularizer = bias_regularizer
-    self._use_sync_bn = use_sync_bn
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    self._finegrain_classification_mode = finegrain_classification_mode
-    self._output_intermediate_endpoints = output_intermediate_endpoints
-    inputs = tf.keras.Input(shape=input_specs.shape[1:])
-    block_specs = SUPPORTED_SPECS_MAP.get(model_id)
-    self._decoded_specs = block_spec_decoder(
-        specs=block_specs,
-        filter_size_scale=self._filter_size_scale,
-        divisible_by=self._get_divisible_by(),
-        finegrain_classification_mode=self._finegrain_classification_mode)
-    x, endpoints, next_endpoint_level = self._mobilenet_base(inputs=inputs)
-    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
-    # Don't include the final layer in `self._output_specs` to support decoders.
-    endpoints[str(next_endpoint_level)] = x
-    super(MobileNet, self).__init__(
-        inputs=inputs, outputs=endpoints, **kwargs)
-  def _get_divisible_by(self):
-    if self._model_id == 'MobileNetV1':
-      return 1
-    else:
-      return self._divisible_by
-  def _mobilenet_base(self,
-                      inputs: tf.Tensor
-                      ) -> Tuple[tf.Tensor, Dict[str, tf.Tensor], int]:
-    """Builds the base MobileNet architecture.
-    Args:
-      inputs: A `tf.Tensor` of shape `[batch_size, height, width, channels]`.
-    Returns:
-      A tuple of output Tensor and dictionary that collects endpoints.
-    """
-    input_shape = inputs.get_shape().as_list()
-    if len(input_shape) != 4:
-      raise ValueError('Expected rank 4 input, was: %d' % len(input_shape))
-    # The current_stride variable keeps track of the output stride of the
-    # activations, i.e., the running product of convolution strides up to the
-    # current network layer. This allows us to invoke atrous convolution
-    # whenever applying the next convolution would result in the activations
-    # having output stride larger than the target output_stride.
-    current_stride = 1
-    # The atrous convolution rate parameter.
-    rate = 1
-    net = inputs
-    endpoints = {}
-    endpoint_level = 2
-    for i, block_def in enumerate(self._decoded_specs):
-      block_name = 'block_group_{}_{}'.format(block_def.block_fn, i)
-      # A small catch for gpooling block with None strides
-      if not block_def.strides:
-        block_def.strides = 1
-      if (self._output_stride is not None and
-          current_stride == self._output_stride):
-        # If we have reached the target output_stride, then we need to employ
-        # atrous convolution with stride=1 and multiply the atrous rate by the
-        # current unit's stride for use in subsequent layers.
-        layer_stride = 1
-        layer_rate = rate
-        rate *= block_def.strides
-      else:
-        layer_stride = block_def.strides
-        layer_rate = 1
-        current_stride *= block_def.strides
-      intermediate_endpoints = {}
-      if block_def.block_fn == 'convbn':
-        net = Conv2DBNBlock(
-            filters=block_def.filters,
-            kernel_size=block_def.kernel_size,
-            strides=block_def.strides,
-            activation=block_def.activation,
-            use_bias=block_def.use_bias,
-            use_normalization=block_def.use_normalization,
-            kernel_initializer=self._kernel_initializer,
-            kernel_regularizer=self._kernel_regularizer,
-            bias_regularizer=self._bias_regularizer,
-            use_sync_bn=self._use_sync_bn,
-            norm_momentum=self._norm_momentum,
-            norm_epsilon=self._norm_epsilon
-        )(net)
-      elif block_def.block_fn == 'depsepconv':
-        net = nn_blocks.DepthwiseSeparableConvBlock(
-            filters=block_def.filters,
-            kernel_size=block_def.kernel_size,
-            strides=layer_stride,
-            activation=block_def.activation,
-            dilation_rate=layer_rate,
-            regularize_depthwise=self._regularize_depthwise,
-            kernel_initializer=self._kernel_initializer,
-            kernel_regularizer=self._kernel_regularizer,
-            use_sync_bn=self._use_sync_bn,
-            norm_momentum=self._norm_momentum,
-            norm_epsilon=self._norm_epsilon,
-        )(net)
-      elif block_def.block_fn == 'invertedbottleneck':
-        use_rate = rate
-        if layer_rate > 1 and block_def.kernel_size != 1:
-          # We will apply atrous rate in the following cases:
-          # 1) When kernel_size is not in params, the operation then uses
-          #   default kernel size 3x3.
-          # 2) When kernel_size is in params, and if the kernel_size is not
-          #   equal to (1, 1) (there is no need to apply atrous convolution to
-          #   any 1x1 convolution).
-          use_rate = layer_rate
-        in_filters = net.shape.as_list()[-1]
-        block = nn_blocks.InvertedBottleneckBlock(
-            in_filters=in_filters,
-            out_filters=block_def.filters,
-            kernel_size=block_def.kernel_size,
-            strides=layer_stride,
-            expand_ratio=block_def.expand_ratio,
-            se_ratio=block_def.se_ratio,
-            expand_se_in_filters=True,
-            se_gating_activation='hard_sigmoid',
-            activation=block_def.activation,
-            use_depthwise=block_def.use_depthwise,
-            use_residual=block_def.use_residual,
-            dilation_rate=use_rate,
-            regularize_depthwise=self._regularize_depthwise,
-            kernel_initializer=self._kernel_initializer,
-            kernel_regularizer=self._kernel_regularizer,
-            bias_regularizer=self._bias_regularizer,
-            use_sync_bn=self._use_sync_bn,
-            norm_momentum=self._norm_momentum,
-            norm_epsilon=self._norm_epsilon,
-            stochastic_depth_drop_rate=self._stochastic_depth_drop_rate,
-            divisible_by=self._get_divisible_by(),
-            output_intermediate_endpoints=self._output_intermediate_endpoints,
-        )
-        if self._output_intermediate_endpoints:
-          net, intermediate_endpoints = block(net)
-        else:
-          net = block(net)
-      elif block_def.block_fn == 'gpooling':
-        net = layers.GlobalAveragePooling2D()(net)
-        net = layers.Reshape((1, 1, net.shape[1]))(net)
-      else:
-        raise ValueError('Unknown block type {} for layer {}'.format(
-            block_def.block_fn, i))
-      net = tf.keras.layers.Activation('linear', name=block_name)(net)
-      if block_def.is_output:
-        endpoints[str(endpoint_level)] = net
-        for key, tensor in intermediate_endpoints.items():
-          endpoints[str(endpoint_level) + '/' + key] = tensor
-        if current_stride != self._output_stride:
-          endpoint_level += 1
-    if str(endpoint_level) in endpoints:
-      endpoint_level += 1
-    return net, endpoints, endpoint_level
-  def get_config(self):
-    config_dict = {
-        'model_id': self._model_id,
-        'filter_size_scale': self._filter_size_scale,
-        'min_depth': self._min_depth,
-        'output_stride': self._output_stride,
-        'divisible_by': self._divisible_by,
-        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
-        'regularize_depthwise': self._regularize_depthwise,
-        'kernel_initializer': self._kernel_initializer,
-        'kernel_regularizer': self._kernel_regularizer,
-        'bias_regularizer': self._bias_regularizer,
-        'use_sync_bn': self._use_sync_bn,
-        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon,
-        'finegrain_classification_mode': self._finegrain_classification_mode,
-    }
-    return config_dict
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-  @property
-  def output_specs(self):
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-@factory.register_backbone_builder('mobilenet')
-def build_mobilenet(
-    input_specs: tf.keras.layers.InputSpec,
-    backbone_config: hyperparams.Config,
-    norm_activation_config: hyperparams.Config,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
-) -> tf.keras.Model:
-  """Builds MobileNet backbone from a config."""
-  backbone_type = backbone_config.type
-  backbone_cfg = backbone_config.get()
-  assert backbone_type == 'mobilenet', (f'Inconsistent backbone type '
-                                        f'{backbone_type}')
-  return MobileNet(
-      model_id=backbone_cfg.model_id,
-      filter_size_scale=backbone_cfg.filter_size_scale,
-      input_specs=input_specs,
-      stochastic_depth_drop_rate=backbone_cfg.stochastic_depth_drop_rate,
-      output_stride=backbone_cfg.output_stride,
-      output_intermediate_endpoints=backbone_cfg.output_intermediate_endpoints,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
--- a/official/vision/modeling/backbones/mobilenet_test.py
+++ b/official/vision/modeling/backbones/mobilenet_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Tests for MobileNet."""
-import itertools
-import math
-# Import libraries
-from absl.testing import parameterized
-import tensorflow as tf
-from official.vision.modeling.backbones import mobilenet
-class MobileNetTest(parameterized.TestCase, tf.test.TestCase):
-  @parameterized.parameters(
-      'MobileNetV1',
-      'MobileNetV2',
-      'MobileNetV3Large',
-      'MobileNetV3Small',
-      'MobileNetV3EdgeTPU',
-      'MobileNetMultiAVG',
-      'MobileNetMultiMAX',
-      'MobileNetMultiAVGSeg',
-      'MobileNetMultiMAXSeg',
-      'MobileNetV3SmallReducedFilters',
-  )
-  def test_serialize_deserialize(self, model_id):
-    # Create a network object that sets all of its config options.
-    kwargs = dict(
-        model_id=model_id,
-        filter_size_scale=1.0,
-        stochastic_depth_drop_rate=None,
-        use_sync_bn=False,
-        kernel_initializer='VarianceScaling',
-        kernel_regularizer=None,
-        bias_regularizer=None,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        output_stride=None,
-        min_depth=8,
-        divisible_by=8,
-        regularize_depthwise=False,
-        finegrain_classification_mode=True
-    )
-    network = mobilenet.MobileNet(**kwargs)
-    expected_config = dict(kwargs)
-    self.assertEqual(network.get_config(), expected_config)
-    # Create another network object from the first object's config.
-    new_network = mobilenet.MobileNet.from_config(network.get_config())
-    # Validate that the config can be forced to JSON.
-    _ = new_network.to_json()
-    # If the serialization was successful, the new config should match the old.
-    self.assertAllEqual(network.get_config(), new_network.get_config())
-  @parameterized.parameters(
-      itertools.product(
-          [1, 3],
-          [
-              'MobileNetV1',
-              'MobileNetV2',
-              'MobileNetV3Large',
-              'MobileNetV3Small',
-              'MobileNetV3EdgeTPU',
-              'MobileNetMultiAVG',
-              'MobileNetMultiMAX',
-              'MobileNetMultiAVGSeg',
-              'MobileNetMultiMAXSeg',
-              'MobileNetV3SmallReducedFilters',
-          ],
-      ))
-  def test_input_specs(self, input_dim, model_id):
-    """Test different input feature dimensions."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
-    network = mobilenet.MobileNet(model_id=model_id, input_specs=input_specs)
-    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
-    _ = network(inputs)
-  @parameterized.parameters(
-      itertools.product(
-          [
-              'MobileNetV1',
-              'MobileNetV2',
-              'MobileNetV3Large',
-              'MobileNetV3Small',
-              'MobileNetV3EdgeTPU',
-              'MobileNetMultiAVG',
-              'MobileNetMultiMAX',
-              'MobileNetMultiAVGSeg',
-              'MobileNetV3SmallReducedFilters',
-          ],
-          [32, 224],
-      ))
-  def test_mobilenet_creation(self, model_id,
-                              input_size):
-    """Test creation of MobileNet family models."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    mobilenet_layers = {
-        # The number of filters of layers having outputs been collected
-        # for filter_size_scale = 1.0
-        'MobileNetV1': [128, 256, 512, 1024],
-        'MobileNetV2': [24, 32, 96, 320],
-        'MobileNetV3Small': [16, 24, 48, 96],
-        'MobileNetV3Large': [24, 40, 112, 160],
-        'MobileNetV3EdgeTPU': [32, 48, 96, 192],
-        'MobileNetMultiMAX': [32, 64, 128, 160],
-        'MobileNetMultiAVG': [32, 64, 160, 192],
-        'MobileNetMultiAVGSeg': [32, 64, 160, 96],
-        'MobileNetMultiMAXSeg': [32, 64, 128, 96],
-        'MobileNetV3SmallReducedFilters': [16, 24, 48, 48],
-    }
-    network = mobilenet.MobileNet(model_id=model_id,
-                                  filter_size_scale=1.0)
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    endpoints = network(inputs)
-    for idx, num_filter in enumerate(mobilenet_layers[model_id]):
-      self.assertAllEqual(
-          [1, input_size / 2 ** (idx+2), input_size / 2 ** (idx+2), num_filter],
-          endpoints[str(idx+2)].shape.as_list())
-  @parameterized.parameters(
-      itertools.product(
-          [
-              'MobileNetV1',
-              'MobileNetV2',
-              'MobileNetV3Large',
-              'MobileNetV3Small',
-              'MobileNetV3EdgeTPU',
-              'MobileNetMultiAVG',
-              'MobileNetMultiMAX',
-              'MobileNetMultiAVGSeg',
-              'MobileNetMultiMAXSeg',
-              'MobileNetV3SmallReducedFilters',
-          ],
-          [32, 224],
-      ))
-  def test_mobilenet_intermediate_layers(self, model_id, input_size):
-    tf.keras.backend.set_image_data_format('channels_last')
-    # Tests the mobilenet intermediate depthwise layers.
-    mobilenet_depthwise_layers = {
-        # The number of filters of depthwise layers having outputs been
-        # collected for filter_size_scale = 1.0. Only tests the mobilenet
-        # model with inverted bottleneck block using depthwise which excludes
-        # MobileNetV1.
-        'MobileNetV1': [],
-        'MobileNetV2': [144, 192, 576, 960],
-        'MobileNetV3Small': [16, 88, 144, 576],
-        'MobileNetV3Large': [72, 120, 672, 960],
-        'MobileNetV3EdgeTPU': [None, None, 384, 1280],
-        'MobileNetMultiMAX': [96, 128, 384, 640],
-        'MobileNetMultiAVG': [64, 192, 640, 768],
-        'MobileNetMultiAVGSeg': [64, 192, 640, 384],
-        'MobileNetMultiMAXSeg': [96, 128, 384, 320],
-        'MobileNetV3SmallReducedFilters': [16, 88, 144, 288],
-    }
-    network = mobilenet.MobileNet(model_id=model_id,
-                                  filter_size_scale=1.0,
-                                  output_intermediate_endpoints=True)
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    endpoints = network(inputs)
-    for idx, num_filter in enumerate(mobilenet_depthwise_layers[model_id]):
-      # Not using depthwise conv in this layer.
-      if num_filter is None:
-        continue
-      self.assertAllEqual(
-          [1, input_size / 2**(idx + 2), input_size / 2**(idx + 2), num_filter],
-          endpoints[str(idx + 2) + '/depthwise'].shape.as_list())
-  @parameterized.parameters(
-      itertools.product(
-          [
-              'MobileNetV1',
-              'MobileNetV2',
-              'MobileNetV3Large',
-              'MobileNetV3Small',
-              'MobileNetV3EdgeTPU',
-              'MobileNetMultiAVG',
-              'MobileNetMultiMAX',
-              'MobileNetMultiMAX',
-              'MobileNetMultiAVGSeg',
-              'MobileNetMultiMAXSeg',
-              'MobileNetV3SmallReducedFilters',
-          ],
-          [1.0, 0.75],
-      ))
-  def test_mobilenet_scaling(self, model_id,
-                             filter_size_scale):
-    """Test for creation of a MobileNet classifier."""
-    mobilenet_params = {
-        ('MobileNetV1', 1.0): 3228864,
-        ('MobileNetV1', 0.75): 1832976,
-        ('MobileNetV2', 1.0): 2257984,
-        ('MobileNetV2', 0.75): 1382064,
-        ('MobileNetV3Large', 1.0): 4226432,
-        ('MobileNetV3Large', 0.75): 2731616,
-        ('MobileNetV3Small', 1.0): 1529968,
-        ('MobileNetV3Small', 0.75): 1026552,
-        ('MobileNetV3EdgeTPU', 1.0): 2849312,
-        ('MobileNetV3EdgeTPU', 0.75): 1737288,
-        ('MobileNetMultiAVG', 1.0): 3704416,
-        ('MobileNetMultiAVG', 0.75): 2349704,
-        ('MobileNetMultiMAX', 1.0): 3174560,
-        ('MobileNetMultiMAX', 0.75): 2045816,
-        ('MobileNetMultiAVGSeg', 1.0): 2239840,
-        ('MobileNetMultiAVGSeg', 0.75): 1395272,
-        ('MobileNetMultiMAXSeg', 1.0): 1929088,
-        ('MobileNetMultiMAXSeg', 0.75): 1216544,
-        ('MobileNetV3SmallReducedFilters', 1.0): 694880,
-        ('MobileNetV3SmallReducedFilters', 0.75): 505960,
-    }
-    input_size = 224
-    network = mobilenet.MobileNet(model_id=model_id,
-                                  filter_size_scale=filter_size_scale)
-    self.assertEqual(network.count_params(),
-                     mobilenet_params[(model_id, filter_size_scale)])
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    _ = network(inputs)
-  @parameterized.parameters(
-      itertools.product(
-          [
-              'MobileNetV1',
-              'MobileNetV2',
-              'MobileNetV3Large',
-              'MobileNetV3Small',
-              'MobileNetV3EdgeTPU',
-              'MobileNetMultiAVG',
-              'MobileNetMultiMAX',
-              'MobileNetMultiAVGSeg',
-              'MobileNetMultiMAXSeg',
-              'MobileNetV3SmallReducedFilters',
-          ],
-          [8, 16, 32],
-      ))
-  def test_mobilenet_output_stride(self, model_id, output_stride):
-    """Test for creation of a MobileNet with different output strides."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    mobilenet_layers = {
-        # The number of filters of the layers outputs been collected
-        # for filter_size_scale = 1.0.
-        'MobileNetV1': 1024,
-        'MobileNetV2': 320,
-        'MobileNetV3Small': 96,
-        'MobileNetV3Large': 160,
-        'MobileNetV3EdgeTPU': 192,
-        'MobileNetMultiMAX': 160,
-        'MobileNetMultiAVG': 192,
-        'MobileNetMultiAVGSeg': 448,
-        'MobileNetMultiMAXSeg': 448,
-        'MobileNetV3SmallReducedFilters': 48,
-    }
-    network = mobilenet.MobileNet(
-        model_id=model_id, filter_size_scale=1.0, output_stride=output_stride)
-    level = int(math.log2(output_stride))
-    input_size = 224
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    endpoints = network(inputs)
-    num_filter = mobilenet_layers[model_id]
-    self.assertAllEqual(
-        [1, input_size / output_stride, input_size / output_stride, num_filter],
-        endpoints[str(level)].shape.as_list())
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/modeling/backbones/resnet.py
+++ b/official/vision/modeling/backbones/resnet.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains definitions of ResNet and ResNet-RS models."""
-from typing import Callable, Optional
-# Import libraries
-import tensorflow as tf
-from official.modeling import hyperparams
-from official.modeling import tf_utils
-from official.vision.modeling.backbones import factory
-from official.vision.modeling.layers import nn_blocks
-from official.vision.modeling.layers import nn_layers
-layers = tf.keras.layers
-# Specifications for different ResNet variants.
-# Each entry specifies block configurations of the particular ResNet variant.
-# Each element in the block configuration is in the following format:
-# (block_fn, num_filters, block_repeats)
-RESNET_SPECS = {
-    10: [
-        ('residual', 64, 1),
-        ('residual', 128, 1),
-        ('residual', 256, 1),
-        ('residual', 512, 1),
-    ],
-    18: [
-        ('residual', 64, 2),
-        ('residual', 128, 2),
-        ('residual', 256, 2),
-        ('residual', 512, 2),
-    ],
-    34: [
-        ('residual', 64, 3),
-        ('residual', 128, 4),
-        ('residual', 256, 6),
-        ('residual', 512, 3),
-    ],
-    50: [
-        ('bottleneck', 64, 3),
-        ('bottleneck', 128, 4),
-        ('bottleneck', 256, 6),
-        ('bottleneck', 512, 3),
-    ],
-    101: [
-        ('bottleneck', 64, 3),
-        ('bottleneck', 128, 4),
-        ('bottleneck', 256, 23),
-        ('bottleneck', 512, 3),
-    ],
-    152: [
-        ('bottleneck', 64, 3),
-        ('bottleneck', 128, 8),
-        ('bottleneck', 256, 36),
-        ('bottleneck', 512, 3),
-    ],
-    200: [
-        ('bottleneck', 64, 3),
-        ('bottleneck', 128, 24),
-        ('bottleneck', 256, 36),
-        ('bottleneck', 512, 3),
-    ],
-    270: [
-        ('bottleneck', 64, 4),
-        ('bottleneck', 128, 29),
-        ('bottleneck', 256, 53),
-        ('bottleneck', 512, 4),
-    ],
-    350: [
-        ('bottleneck', 64, 4),
-        ('bottleneck', 128, 36),
-        ('bottleneck', 256, 72),
-        ('bottleneck', 512, 4),
-    ],
-    420: [
-        ('bottleneck', 64, 4),
-        ('bottleneck', 128, 44),
-        ('bottleneck', 256, 87),
-        ('bottleneck', 512, 4),
-    ],
-}
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class ResNet(tf.keras.Model):
-  """Creates ResNet and ResNet-RS family models.
-  This implements the Deep Residual Network from:
-    Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
-    Deep Residual Learning for Image Recognition.
-    (https://arxiv.org/pdf/1512.03385) and
-    Irwan Bello, William Fedus, Xianzhi Du, Ekin D. Cubuk, Aravind Srinivas,
-    Tsung-Yi Lin, Jonathon Shlens, Barret Zoph.
-    Revisiting ResNets: Improved Training and Scaling Strategies.
-    (https://arxiv.org/abs/2103.07579).
-  """
-  def __init__(
-      self,
-      model_id: int,
-      input_specs: tf.keras.layers.InputSpec = layers.InputSpec(
-          shape=[None, None, None, 3]),
-      depth_multiplier: float = 1.0,
-      stem_type: str = 'v0',
-      resnetd_shortcut: bool = False,
-      replace_stem_max_pool: bool = False,
-      se_ratio: Optional[float] = None,
-      init_stochastic_depth_rate: float = 0.0,
-      scale_stem: bool = True,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bn_trainable: bool = True,
-      **kwargs):
-    """Initializes a ResNet model.
-    Args:
-      model_id: An `int` of the depth of ResNet backbone model.
-      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
-      depth_multiplier: A `float` of the depth multiplier to uniformaly scale up
-        all layers in channel size. This argument is also referred to as
-        `width_multiplier` in (https://arxiv.org/abs/2103.07579).
-      stem_type: A `str` of stem type of ResNet. Default to `v0`. If set to
-        `v1`, use ResNet-D type stem (https://arxiv.org/abs/1812.01187).
-      resnetd_shortcut: A `bool` of whether to use ResNet-D shortcut in
-        downsampling blocks.
-      replace_stem_max_pool: A `bool` of whether to replace the max pool in stem
-        with a stride-2 conv,
-      se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
-      init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
-      scale_stem: A `bool` of whether to scale stem layers.
-      activation: A `str` name of the activation function.
-      use_sync_bn: If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A small `float` added to variance to avoid dividing by zero.
-      kernel_initializer: A str for kernel initializer of convolutional layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default to None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-        Default to None.
-      bn_trainable: A `bool` that indicates whether batch norm layers should be
-        trainable. Default to True.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    self._model_id = model_id
-    self._input_specs = input_specs
-    self._depth_multiplier = depth_multiplier
-    self._stem_type = stem_type
-    self._resnetd_shortcut = resnetd_shortcut
-    self._replace_stem_max_pool = replace_stem_max_pool
-    self._se_ratio = se_ratio
-    self._init_stochastic_depth_rate = init_stochastic_depth_rate
-    self._scale_stem = scale_stem
-    self._use_sync_bn = use_sync_bn
-    self._activation = activation
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    if use_sync_bn:
-      self._norm = layers.experimental.SyncBatchNormalization
-    else:
-      self._norm = layers.BatchNormalization
-    self._kernel_initializer = kernel_initializer
-    self._kernel_regularizer = kernel_regularizer
-    self._bias_regularizer = bias_regularizer
-    self._bn_trainable = bn_trainable
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      bn_axis = -1
-    else:
-      bn_axis = 1
-    # Build ResNet.
-    inputs = tf.keras.Input(shape=input_specs.shape[1:])
-    stem_depth_multiplier = self._depth_multiplier if scale_stem else 1.0
-    if stem_type == 'v0':
-      x = layers.Conv2D(
-          filters=int(64 * stem_depth_multiplier),
-          kernel_size=7,
-          strides=2,
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              inputs)
-      x = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          trainable=bn_trainable)(
-              x)
-      x = tf_utils.get_activation(activation, use_keras_layer=True)(x)
-    elif stem_type == 'v1':
-      x = layers.Conv2D(
-          filters=int(32 * stem_depth_multiplier),
-          kernel_size=3,
-          strides=2,
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              inputs)
-      x = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          trainable=bn_trainable)(
-              x)
-      x = tf_utils.get_activation(activation, use_keras_layer=True)(x)
-      x = layers.Conv2D(
-          filters=int(32 * stem_depth_multiplier),
-          kernel_size=3,
-          strides=1,
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              x)
-      x = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          trainable=bn_trainable)(
-              x)
-      x = tf_utils.get_activation(activation, use_keras_layer=True)(x)
-      x = layers.Conv2D(
-          filters=int(64 * stem_depth_multiplier),
-          kernel_size=3,
-          strides=1,
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              x)
-      x = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          trainable=bn_trainable)(
-              x)
-      x = tf_utils.get_activation(activation, use_keras_layer=True)(x)
-    else:
-      raise ValueError('Stem type {} not supported.'.format(stem_type))
-    if replace_stem_max_pool:
-      x = layers.Conv2D(
-          filters=int(64 * self._depth_multiplier),
-          kernel_size=3,
-          strides=2,
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              x)
-      x = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          trainable=bn_trainable)(
-              x)
-      x = tf_utils.get_activation(activation, use_keras_layer=True)(x)
-    else:
-      x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
-    endpoints = {}
-    for i, spec in enumerate(RESNET_SPECS[model_id]):
-      if spec[0] == 'residual':
-        block_fn = nn_blocks.ResidualBlock
-      elif spec[0] == 'bottleneck':
-        block_fn = nn_blocks.BottleneckBlock
-      else:
-        raise ValueError('Block fn `{}` is not supported.'.format(spec[0]))
-      x = self._block_group(
-          inputs=x,
-          filters=int(spec[1] * self._depth_multiplier),
-          strides=(1 if i == 0 else 2),
-          block_fn=block_fn,
-          block_repeats=spec[2],
-          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
-              self._init_stochastic_depth_rate, i + 2, 5),
-          name='block_group_l{}'.format(i + 2))
-      endpoints[str(i + 2)] = x
-    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
-    super(ResNet, self).__init__(inputs=inputs, outputs=endpoints, **kwargs)
-  def _block_group(self,
-                   inputs: tf.Tensor,
-                   filters: int,
-                   strides: int,
-                   block_fn: Callable[..., tf.keras.layers.Layer],
-                   block_repeats: int = 1,
-                   stochastic_depth_drop_rate: float = 0.0,
-                   name: str = 'block_group'):
-    """Creates one group of blocks for the ResNet model.
-    Args:
-      inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
-      filters: An `int` number of filters for the first convolution of the
-        layer.
-      strides: An `int` stride to use for the first convolution of the layer.
-        If greater than 1, this layer will downsample the input.
-      block_fn: The type of block group. Either `nn_blocks.ResidualBlock` or
-        `nn_blocks.BottleneckBlock`.
-      block_repeats: An `int` number of blocks contained in the layer.
-      stochastic_depth_drop_rate: A `float` of drop rate of the current block
-        group.
-      name: A `str` name for the block.
-    Returns:
-      The output `tf.Tensor` of the block layer.
-    """
-    x = block_fn(
-        filters=filters,
-        strides=strides,
-        use_projection=True,
-        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
-        se_ratio=self._se_ratio,
-        resnetd_shortcut=self._resnetd_shortcut,
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer,
-        activation=self._activation,
-        use_sync_bn=self._use_sync_bn,
-        norm_momentum=self._norm_momentum,
-        norm_epsilon=self._norm_epsilon,
-        bn_trainable=self._bn_trainable)(
-            inputs)
-    for _ in range(1, block_repeats):
-      x = block_fn(
-          filters=filters,
-          strides=1,
-          use_projection=False,
-          stochastic_depth_drop_rate=stochastic_depth_drop_rate,
-          se_ratio=self._se_ratio,
-          resnetd_shortcut=self._resnetd_shortcut,
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer,
-          activation=self._activation,
-          use_sync_bn=self._use_sync_bn,
-          norm_momentum=self._norm_momentum,
-          norm_epsilon=self._norm_epsilon,
-          bn_trainable=self._bn_trainable)(
-              x)
-    return tf.keras.layers.Activation('linear', name=name)(x)
-  def get_config(self):
-    config_dict = {
-        'model_id': self._model_id,
-        'depth_multiplier': self._depth_multiplier,
-        'stem_type': self._stem_type,
-        'resnetd_shortcut': self._resnetd_shortcut,
-        'replace_stem_max_pool': self._replace_stem_max_pool,
-        'activation': self._activation,
-        'se_ratio': self._se_ratio,
-        'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
-        'scale_stem': self._scale_stem,
-        'use_sync_bn': self._use_sync_bn,
-        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon,
-        'kernel_initializer': self._kernel_initializer,
-        'kernel_regularizer': self._kernel_regularizer,
-        'bias_regularizer': self._bias_regularizer,
-        'bn_trainable': self._bn_trainable
-    }
-    return config_dict
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-  @property
-  def output_specs(self):
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-@factory.register_backbone_builder('resnet')
-def build_resnet(
-    input_specs: tf.keras.layers.InputSpec,
-    backbone_config: hyperparams.Config,
-    norm_activation_config: hyperparams.Config,
-    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
-  """Builds ResNet backbone from a config."""
-  backbone_type = backbone_config.type
-  backbone_cfg = backbone_config.get()
-  assert backbone_type == 'resnet', (f'Inconsistent backbone type '
-                                     f'{backbone_type}')
-  return ResNet(
-      model_id=backbone_cfg.model_id,
-      input_specs=input_specs,
-      depth_multiplier=backbone_cfg.depth_multiplier,
-      stem_type=backbone_cfg.stem_type,
-      resnetd_shortcut=backbone_cfg.resnetd_shortcut,
-      replace_stem_max_pool=backbone_cfg.replace_stem_max_pool,
-      se_ratio=backbone_cfg.se_ratio,
-      init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
-      scale_stem=backbone_cfg.scale_stem,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer,
-      bn_trainable=backbone_cfg.bn_trainable)
--- a/official/vision/modeling/backbones/resnet_3d.py
+++ b/official/vision/modeling/backbones/resnet_3d.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains definitions of 3D Residual Networks."""
-from typing import Callable, List, Tuple, Optional
-# Import libraries
-import tensorflow as tf
-from official.modeling import hyperparams
-from official.modeling import tf_utils
-from official.vision.modeling.backbones import factory
-from official.vision.modeling.layers import nn_blocks_3d
-from official.vision.modeling.layers import nn_layers
-layers = tf.keras.layers
-RESNET_SPECS = {
-    50: [
-        ('bottleneck3d', 64, 3),
-        ('bottleneck3d', 128, 4),
-        ('bottleneck3d', 256, 6),
-        ('bottleneck3d', 512, 3),
-    ],
-    101: [
-        ('bottleneck3d', 64, 3),
-        ('bottleneck3d', 128, 4),
-        ('bottleneck3d', 256, 23),
-        ('bottleneck3d', 512, 3),
-    ],
-    152: [
-        ('bottleneck3d', 64, 3),
-        ('bottleneck3d', 128, 8),
-        ('bottleneck3d', 256, 36),
-        ('bottleneck3d', 512, 3),
-    ],
-    200: [
-        ('bottleneck3d', 64, 3),
-        ('bottleneck3d', 128, 24),
-        ('bottleneck3d', 256, 36),
-        ('bottleneck3d', 512, 3),
-    ],
-    270: [
-        ('bottleneck3d', 64, 4),
-        ('bottleneck3d', 128, 29),
-        ('bottleneck3d', 256, 53),
-        ('bottleneck3d', 512, 4),
-    ],
-    300: [
-        ('bottleneck3d', 64, 4),
-        ('bottleneck3d', 128, 36),
-        ('bottleneck3d', 256, 54),
-        ('bottleneck3d', 512, 4),
-    ],
-    350: [
-        ('bottleneck3d', 64, 4),
-        ('bottleneck3d', 128, 36),
-        ('bottleneck3d', 256, 72),
-        ('bottleneck3d', 512, 4),
-    ],
-}
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class ResNet3D(tf.keras.Model):
-  """Creates a 3D ResNet family model."""
-  def __init__(
-      self,
-      model_id: int,
-      temporal_strides: List[int],
-      temporal_kernel_sizes: List[Tuple[int]],
-      use_self_gating: Optional[List[int]] = None,
-      input_specs: tf.keras.layers.InputSpec = layers.InputSpec(
-          shape=[None, None, None, None, 3]),
-      stem_type: str = 'v0',
-      stem_conv_temporal_kernel_size: int = 5,
-      stem_conv_temporal_stride: int = 2,
-      stem_pool_temporal_stride: int = 2,
-      init_stochastic_depth_rate: float = 0.0,
-      activation: str = 'relu',
-      se_ratio: Optional[float] = None,
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      **kwargs):
-    """Initializes a 3D ResNet model.
-    Args:
-      model_id: An `int` of depth of ResNet backbone model.
-      temporal_strides: A list of integers that specifies the temporal strides
-        for all 3d blocks.
-      temporal_kernel_sizes: A list of tuples that specifies the temporal kernel
-        sizes for all 3d blocks in different block groups.
-      use_self_gating: A list of booleans to specify applying self-gating module
-        or not in each block group. If None, self-gating is not applied.
-      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
-      stem_type: A `str` of stem type of ResNet. Default to `v0`. If set to
-        `v1`, use ResNet-D type stem (https://arxiv.org/abs/1812.01187).
-      stem_conv_temporal_kernel_size: An `int` of temporal kernel size for the
-        first conv layer.
-      stem_conv_temporal_stride: An `int` of temporal stride for the first conv
-        layer.
-      stem_pool_temporal_stride: An `int` of temporal stride for the first pool
-        layer.
-      init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
-      activation: A `str` of name of the activation function.
-      se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
-      use_sync_bn: If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_initializer: A str for kernel initializer of convolutional layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default to None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-        Default to None.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    self._model_id = model_id
-    self._temporal_strides = temporal_strides
-    self._temporal_kernel_sizes = temporal_kernel_sizes
-    self._input_specs = input_specs
-    self._stem_type = stem_type
-    self._stem_conv_temporal_kernel_size = stem_conv_temporal_kernel_size
-    self._stem_conv_temporal_stride = stem_conv_temporal_stride
-    self._stem_pool_temporal_stride = stem_pool_temporal_stride
-    self._use_self_gating = use_self_gating
-    self._se_ratio = se_ratio
-    self._init_stochastic_depth_rate = init_stochastic_depth_rate
-    self._use_sync_bn = use_sync_bn
-    self._activation = activation
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    if use_sync_bn:
-      self._norm = layers.experimental.SyncBatchNormalization
-    else:
-      self._norm = layers.BatchNormalization
-    self._kernel_initializer = kernel_initializer
-    self._kernel_regularizer = kernel_regularizer
-    self._bias_regularizer = bias_regularizer
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      bn_axis = -1
-    else:
-      bn_axis = 1
-    # Build ResNet3D backbone.
-    inputs = tf.keras.Input(shape=input_specs.shape[1:])
-    # Build stem.
-    if stem_type == 'v0':
-      x = layers.Conv3D(
-          filters=64,
-          kernel_size=[stem_conv_temporal_kernel_size, 7, 7],
-          strides=[stem_conv_temporal_stride, 2, 2],
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              inputs)
-      x = self._norm(
-          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(activation)(x)
-    elif stem_type == 'v1':
-      x = layers.Conv3D(
-          filters=32,
-          kernel_size=[stem_conv_temporal_kernel_size, 3, 3],
-          strides=[stem_conv_temporal_stride, 2, 2],
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              inputs)
-      x = self._norm(
-          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(activation)(x)
-      x = layers.Conv3D(
-          filters=32,
-          kernel_size=[1, 3, 3],
-          strides=[1, 1, 1],
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              x)
-      x = self._norm(
-          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(activation)(x)
-      x = layers.Conv3D(
-          filters=64,
-          kernel_size=[1, 3, 3],
-          strides=[1, 1, 1],
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              x)
-      x = self._norm(
-          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(activation)(x)
-    else:
-      raise ValueError(f'Stem type {stem_type} not supported.')
-    temporal_kernel_size = 1 if stem_pool_temporal_stride == 1 else 3
-    x = layers.MaxPool3D(
-        pool_size=[temporal_kernel_size, 3, 3],
-        strides=[stem_pool_temporal_stride, 2, 2],
-        padding='same')(
-            x)
-    # Build intermediate blocks and endpoints.
-    resnet_specs = RESNET_SPECS[model_id]
-    if len(temporal_strides) != len(resnet_specs) or len(
-        temporal_kernel_sizes) != len(resnet_specs):
-      raise ValueError(
-          'Number of blocks in temporal specs should equal to resnet_specs.')
-    endpoints = {}
-    for i, resnet_spec in enumerate(resnet_specs):
-      if resnet_spec[0] == 'bottleneck3d':
-        block_fn = nn_blocks_3d.BottleneckBlock3D
-      else:
-        raise ValueError('Block fn `{}` is not supported.'.format(
-            resnet_spec[0]))
-      x = self._block_group(
-          inputs=x,
-          filters=resnet_spec[1],
-          temporal_kernel_sizes=temporal_kernel_sizes[i],
-          temporal_strides=temporal_strides[i],
-          spatial_strides=(1 if i == 0 else 2),
-          block_fn=block_fn,
-          block_repeats=resnet_spec[2],
-          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
-              self._init_stochastic_depth_rate, i + 2, 5),
-          use_self_gating=use_self_gating[i] if use_self_gating else False,
-          name='block_group_l{}'.format(i + 2))
-      endpoints[str(i + 2)] = x
-    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
-    super(ResNet3D, self).__init__(inputs=inputs, outputs=endpoints, **kwargs)
-  def _block_group(self,
-                   inputs: tf.Tensor,
-                   filters: int,
-                   temporal_kernel_sizes: Tuple[int],
-                   temporal_strides: int,
-                   spatial_strides: int,
-                   block_fn: Callable[
-                       ...,
-                       tf.keras.layers.Layer] = nn_blocks_3d.BottleneckBlock3D,
-                   block_repeats: int = 1,
-                   stochastic_depth_drop_rate: float = 0.0,
-                   use_self_gating: bool = False,
-                   name: str = 'block_group'):
-    """Creates one group of blocks for the ResNet3D model.
-    Args:
-      inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
-      filters: An `int` of number of filters for the first convolution of the
-        layer.
-      temporal_kernel_sizes: A tuple that specifies the temporal kernel sizes
-        for each block in the current group.
-      temporal_strides: An `int` of temporal strides for the first convolution
-        in this group.
-      spatial_strides: An `int` stride to use for the first convolution of the
-        layer. If greater than 1, this layer will downsample the input.
-      block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`.
-      block_repeats: An `int` of number of blocks contained in the layer.
-      stochastic_depth_drop_rate: A `float` of drop rate of the current block
-        group.
-      use_self_gating: A `bool` that specifies whether to apply self-gating
-        module or not.
-      name: A `str` name for the block.
-    Returns:
-      The output `tf.Tensor` of the block layer.
-    """
-    if len(temporal_kernel_sizes) != block_repeats:
-      raise ValueError(
-          'Number of elements in `temporal_kernel_sizes` must equal to `block_repeats`.'
-      )
-    # Only apply self-gating module in the last block.
-    use_self_gating_list = [False] * (block_repeats - 1) + [use_self_gating]
-    x = block_fn(
-        filters=filters,
-        temporal_kernel_size=temporal_kernel_sizes[0],
-        temporal_strides=temporal_strides,
-        spatial_strides=spatial_strides,
-        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
-        use_self_gating=use_self_gating_list[0],
-        se_ratio=self._se_ratio,
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer,
-        activation=self._activation,
-        use_sync_bn=self._use_sync_bn,
-        norm_momentum=self._norm_momentum,
-        norm_epsilon=self._norm_epsilon)(
-            inputs)
-    for i in range(1, block_repeats):
-      x = block_fn(
-          filters=filters,
-          temporal_kernel_size=temporal_kernel_sizes[i],
-          temporal_strides=1,
-          spatial_strides=1,
-          stochastic_depth_drop_rate=stochastic_depth_drop_rate,
-          use_self_gating=use_self_gating_list[i],
-          se_ratio=self._se_ratio,
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer,
-          activation=self._activation,
-          use_sync_bn=self._use_sync_bn,
-          norm_momentum=self._norm_momentum,
-          norm_epsilon=self._norm_epsilon)(
-              x)
-    return tf.identity(x, name=name)
-  def get_config(self):
-    config_dict = {
-        'model_id': self._model_id,
-        'temporal_strides': self._temporal_strides,
-        'temporal_kernel_sizes': self._temporal_kernel_sizes,
-        'stem_type': self._stem_type,
-        'stem_conv_temporal_kernel_size': self._stem_conv_temporal_kernel_size,
-        'stem_conv_temporal_stride': self._stem_conv_temporal_stride,
-        'stem_pool_temporal_stride': self._stem_pool_temporal_stride,
-        'use_self_gating': self._use_self_gating,
-        'se_ratio': self._se_ratio,
-        'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
-        'activation': self._activation,
-        'use_sync_bn': self._use_sync_bn,
-        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon,
-        'kernel_initializer': self._kernel_initializer,
-        'kernel_regularizer': self._kernel_regularizer,
-        'bias_regularizer': self._bias_regularizer,
-    }
-    return config_dict
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-  @property
-  def output_specs(self):
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-@factory.register_backbone_builder('resnet_3d')
-def build_resnet3d(
-    input_specs: tf.keras.layers.InputSpec,
-    backbone_config: hyperparams.Config,
-    norm_activation_config: hyperparams.Config,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
-) -> tf.keras.Model:
-  """Builds ResNet 3d backbone from a config."""
-  backbone_cfg = backbone_config.get()
-  # Flatten configs before passing to the backbone.
-  temporal_strides = []
-  temporal_kernel_sizes = []
-  use_self_gating = []
-  for block_spec in backbone_cfg.block_specs:
-    temporal_strides.append(block_spec.temporal_strides)
-    temporal_kernel_sizes.append(block_spec.temporal_kernel_sizes)
-    use_self_gating.append(block_spec.use_self_gating)
-  return ResNet3D(
-      model_id=backbone_cfg.model_id,
-      temporal_strides=temporal_strides,
-      temporal_kernel_sizes=temporal_kernel_sizes,
-      use_self_gating=use_self_gating,
-      input_specs=input_specs,
-      stem_type=backbone_cfg.stem_type,
-      stem_conv_temporal_kernel_size=backbone_cfg
-      .stem_conv_temporal_kernel_size,
-      stem_conv_temporal_stride=backbone_cfg.stem_conv_temporal_stride,
-      stem_pool_temporal_stride=backbone_cfg.stem_pool_temporal_stride,
-      init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
-      se_ratio=backbone_cfg.se_ratio,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
-@factory.register_backbone_builder('resnet_3d_rs')
-def build_resnet3d_rs(
-    input_specs: tf.keras.layers.InputSpec,
-    backbone_config: hyperparams.Config,
-    norm_activation_config: hyperparams.Config,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
-) -> tf.keras.Model:
-  """Builds ResNet-3D-RS backbone from a config."""
-  backbone_cfg = backbone_config.get()
-  # Flatten configs before passing to the backbone.
-  temporal_strides = []
-  temporal_kernel_sizes = []
-  use_self_gating = []
-  for i, block_spec in enumerate(backbone_cfg.block_specs):
-    temporal_strides.append(block_spec.temporal_strides)
-    use_self_gating.append(block_spec.use_self_gating)
-    block_repeats_i = RESNET_SPECS[backbone_cfg.model_id][i][-1]
-    temporal_kernel_sizes.append(list(block_spec.temporal_kernel_sizes) *
-                                 block_repeats_i)
-  return ResNet3D(
-      model_id=backbone_cfg.model_id,
-      temporal_strides=temporal_strides,
-      temporal_kernel_sizes=temporal_kernel_sizes,
-      use_self_gating=use_self_gating,
-      input_specs=input_specs,
-      stem_type=backbone_cfg.stem_type,
-      stem_conv_temporal_kernel_size=backbone_cfg
-      .stem_conv_temporal_kernel_size,
-      stem_conv_temporal_stride=backbone_cfg.stem_conv_temporal_stride,
-      stem_pool_temporal_stride=backbone_cfg.stem_pool_temporal_stride,
-      init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
-      se_ratio=backbone_cfg.se_ratio,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
--- a/official/vision/modeling/backbones/resnet_3d_test.py
+++ b/official/vision/modeling/backbones/resnet_3d_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Tests for resnet."""
-# Import libraries
-from absl.testing import parameterized
-import tensorflow as tf
-from official.vision.modeling.backbones import resnet_3d
-class ResNet3DTest(parameterized.TestCase, tf.test.TestCase):
-  @parameterized.parameters(
-      (128, 50, 4, 'v0', False, 0.0),
-      (128, 50, 4, 'v1', False, 0.2),
-      (256, 50, 4, 'v1', True, 0.2),
-  )
-  def test_network_creation(self, input_size, model_id, endpoint_filter_scale,
-                            stem_type, se_ratio, init_stochastic_depth_rate):
-    """Test creation of ResNet3D family models."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    temporal_strides = [1, 1, 1, 1]
-    temporal_kernel_sizes = [(3, 3, 3), (3, 1, 3, 1), (3, 1, 3, 1, 3, 1),
-                             (1, 3, 1)]
-    use_self_gating = [True, False, True, False]
-    network = resnet_3d.ResNet3D(
-        model_id=model_id,
-        temporal_strides=temporal_strides,
-        temporal_kernel_sizes=temporal_kernel_sizes,
-        use_self_gating=use_self_gating,
-        stem_type=stem_type,
-        se_ratio=se_ratio,
-        init_stochastic_depth_rate=init_stochastic_depth_rate)
-    inputs = tf.keras.Input(shape=(8, input_size, input_size, 3), batch_size=1)
-    endpoints = network(inputs)
-    self.assertAllEqual([
-        1, 2, input_size / 2**2, input_size / 2**2, 64 * endpoint_filter_scale
-    ], endpoints['2'].shape.as_list())
-    self.assertAllEqual([
-        1, 2, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale
-    ], endpoints['3'].shape.as_list())
-    self.assertAllEqual([
-        1, 2, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale
-    ], endpoints['4'].shape.as_list())
-    self.assertAllEqual([
-        1, 2, input_size / 2**5, input_size / 2**5, 512 * endpoint_filter_scale
-    ], endpoints['5'].shape.as_list())
-  def test_serialize_deserialize(self):
-    # Create a network object that sets all of its config options.
-    kwargs = dict(
-        model_id=50,
-        temporal_strides=[1, 1, 1, 1],
-        temporal_kernel_sizes=[(3, 3, 3), (3, 1, 3, 1), (3, 1, 3, 1, 3, 1),
-                               (1, 3, 1)],
-        stem_type='v0',
-        stem_conv_temporal_kernel_size=5,
-        stem_conv_temporal_stride=2,
-        stem_pool_temporal_stride=2,
-        se_ratio=0.0,
-        use_self_gating=None,
-        init_stochastic_depth_rate=0.0,
-        use_sync_bn=False,
-        activation='relu',
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_initializer='VarianceScaling',
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    network = resnet_3d.ResNet3D(**kwargs)
-    expected_config = dict(kwargs)
-    self.assertEqual(network.get_config(), expected_config)
-    # Create another network object from the first object's config.
-    new_network = resnet_3d.ResNet3D.from_config(network.get_config())
-    # Validate that the config can be forced to JSON.
-    _ = new_network.to_json()
-    # If the serialization was successful, the new config should match the old.
-    self.assertAllEqual(network.get_config(), new_network.get_config())
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/modeling/backbones/resnet_deeplab.py
+++ b/official/vision/modeling/backbones/resnet_deeplab.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains definitions of Residual Networks with Deeplab modifications."""
-from typing import Callable, Optional, Tuple, List
-import numpy as np
-import tensorflow as tf
-from official.modeling import hyperparams
-from official.modeling import tf_utils
-from official.vision.modeling.backbones import factory
-from official.vision.modeling.layers import nn_blocks
-from official.vision.modeling.layers import nn_layers
-layers = tf.keras.layers
-# Specifications for different ResNet variants.
-# Each entry specifies block configurations of the particular ResNet variant.
-# Each element in the block configuration is in the following format:
-# (block_fn, num_filters, block_repeats)
-RESNET_SPECS = {
-    50: [
-        ('bottleneck', 64, 3),
-        ('bottleneck', 128, 4),
-        ('bottleneck', 256, 6),
-        ('bottleneck', 512, 3),
-    ],
-    101: [
-        ('bottleneck', 64, 3),
-        ('bottleneck', 128, 4),
-        ('bottleneck', 256, 23),
-        ('bottleneck', 512, 3),
-    ],
-}
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class DilatedResNet(tf.keras.Model):
-  """Creates a ResNet model with Deeplabv3 modifications.
-  This backbone is suitable for semantic segmentation. This implements
-    Liang-Chieh Chen, George Papandreou, Florian Schroff, Hartwig Adam.
-    Rethinking Atrous Convolution for Semantic Image Segmentation.
-    (https://arxiv.org/pdf/1706.05587)
-  """
-  def __init__(
-      self,
-      model_id: int,
-      output_stride: int,
-      input_specs: tf.keras.layers.InputSpec = layers.InputSpec(
-          shape=[None, None, None, 3]),
-      stem_type: str = 'v0',
-      se_ratio: Optional[float] = None,
-      init_stochastic_depth_rate: float = 0.0,
-      multigrid: Optional[Tuple[int]] = None,
-      last_stage_repeats: int = 1,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      **kwargs):
-    """Initializes a ResNet model with DeepLab modification.
-    Args:
-      model_id: An `int` specifies depth of ResNet backbone model.
-      output_stride: An `int` of output stride, ratio of input to output
-        resolution.
-      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
-      stem_type: A `str` of stem type. Can be `v0` or `v1`. `v1` replaces 7x7
-        conv by 3 3x3 convs.
-      se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
-      init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
-      multigrid: A tuple of the same length as the number of blocks in the last
-        resnet stage.
-      last_stage_repeats: An `int` that specifies how many times last stage is
-        repeated.
-      activation: A `str` name of the activation function.
-      use_sync_bn: If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_initializer: A str for kernel initializer of convolutional layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default to None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-        Default to None.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    self._model_id = model_id
-    self._output_stride = output_stride
-    self._input_specs = input_specs
-    self._use_sync_bn = use_sync_bn
-    self._activation = activation
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    if use_sync_bn:
-      self._norm = layers.experimental.SyncBatchNormalization
-    else:
-      self._norm = layers.BatchNormalization
-    self._kernel_initializer = kernel_initializer
-    self._kernel_regularizer = kernel_regularizer
-    self._bias_regularizer = bias_regularizer
-    self._stem_type = stem_type
-    self._se_ratio = se_ratio
-    self._init_stochastic_depth_rate = init_stochastic_depth_rate
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      bn_axis = -1
-    else:
-      bn_axis = 1
-    # Build ResNet.
-    inputs = tf.keras.Input(shape=input_specs.shape[1:])
-    if stem_type == 'v0':
-      x = layers.Conv2D(
-          filters=64,
-          kernel_size=7,
-          strides=2,
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              inputs)
-      x = self._norm(
-          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(activation)(x)
-    elif stem_type == 'v1':
-      x = layers.Conv2D(
-          filters=64,
-          kernel_size=3,
-          strides=2,
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              inputs)
-      x = self._norm(
-          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(activation)(x)
-      x = layers.Conv2D(
-          filters=64,
-          kernel_size=3,
-          strides=1,
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              x)
-      x = self._norm(
-          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(activation)(x)
-      x = layers.Conv2D(
-          filters=128,
-          kernel_size=3,
-          strides=1,
-          use_bias=False,
-          padding='same',
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              x)
-      x = self._norm(
-          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(activation)(x)
-    else:
-      raise ValueError('Stem type {} not supported.'.format(stem_type))
-    x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
-    normal_resnet_stage = int(np.math.log2(self._output_stride)) - 2
-    endpoints = {}
-    for i in range(normal_resnet_stage + 1):
-      spec = RESNET_SPECS[model_id][i]
-      if spec[0] == 'bottleneck':
-        block_fn = nn_blocks.BottleneckBlock
-      else:
-        raise ValueError('Block fn `{}` is not supported.'.format(spec[0]))
-      x = self._block_group(
-          inputs=x,
-          filters=spec[1],
-          strides=(1 if i == 0 else 2),
-          dilation_rate=1,
-          block_fn=block_fn,
-          block_repeats=spec[2],
-          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
-              self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats),
-          name='block_group_l{}'.format(i + 2))
-      endpoints[str(i + 2)] = x
-    dilation_rate = 2
-    for i in range(normal_resnet_stage + 1, 3 + last_stage_repeats):
-      spec = RESNET_SPECS[model_id][i] if i < 3 else RESNET_SPECS[model_id][-1]
-      if spec[0] == 'bottleneck':
-        block_fn = nn_blocks.BottleneckBlock
-      else:
-        raise ValueError('Block fn `{}` is not supported.'.format(spec[0]))
-      x = self._block_group(
-          inputs=x,
-          filters=spec[1],
-          strides=1,
-          dilation_rate=dilation_rate,
-          block_fn=block_fn,
-          block_repeats=spec[2],
-          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
-              self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats),
-          multigrid=multigrid if i >= 3 else None,
-          name='block_group_l{}'.format(i + 2))
-      dilation_rate *= 2
-    endpoints[str(normal_resnet_stage + 2)] = x
-    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
-    super(DilatedResNet, self).__init__(
-        inputs=inputs, outputs=endpoints, **kwargs)
-  def _block_group(self,
-                   inputs: tf.Tensor,
-                   filters: int,
-                   strides: int,
-                   dilation_rate: int,
-                   block_fn: Callable[..., tf.keras.layers.Layer],
-                   block_repeats: int = 1,
-                   stochastic_depth_drop_rate: float = 0.0,
-                   multigrid: Optional[List[int]] = None,
-                   name: str = 'block_group'):
-    """Creates one group of blocks for the ResNet model.
-    Deeplab applies strides at the last block.
-    Args:
-      inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
-      filters: An `int` off number of filters for the first convolution of the
-        layer.
-      strides: An `int` of stride to use for the first convolution of the layer.
-        If greater than 1, this layer will downsample the input.
-      dilation_rate: An `int` of diluted convolution rates.
-      block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`.
-      block_repeats: An `int` of number of blocks contained in the layer.
-      stochastic_depth_drop_rate: A `float` of drop rate of the current block
-        group.
-      multigrid: A list of `int` or None. If specified, dilation rates for each
-        block is scaled up by its corresponding factor in the multigrid.
-      name: A `str` name for the block.
-    Returns:
-      The output `tf.Tensor` of the block layer.
-    """
-    if multigrid is not None and len(multigrid) != block_repeats:
-      raise ValueError('multigrid has to match number of block_repeats')
-    if multigrid is None:
-      multigrid = [1] * block_repeats
-    # TODO(arashwan): move striding at the of the block.
-    x = block_fn(
-        filters=filters,
-        strides=strides,
-        dilation_rate=dilation_rate * multigrid[0],
-        use_projection=True,
-        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
-        se_ratio=self._se_ratio,
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer,
-        activation=self._activation,
-        use_sync_bn=self._use_sync_bn,
-        norm_momentum=self._norm_momentum,
-        norm_epsilon=self._norm_epsilon)(
-            inputs)
-    for i in range(1, block_repeats):
-      x = block_fn(
-          filters=filters,
-          strides=1,
-          dilation_rate=dilation_rate * multigrid[i],
-          use_projection=False,
-          stochastic_depth_drop_rate=stochastic_depth_drop_rate,
-          se_ratio=self._se_ratio,
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer,
-          activation=self._activation,
-          use_sync_bn=self._use_sync_bn,
-          norm_momentum=self._norm_momentum,
-          norm_epsilon=self._norm_epsilon)(
-              x)
-    return tf.identity(x, name=name)
-  def get_config(self):
-    config_dict = {
-        'model_id': self._model_id,
-        'output_stride': self._output_stride,
-        'stem_type': self._stem_type,
-        'se_ratio': self._se_ratio,
-        'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
-        'activation': self._activation,
-        'use_sync_bn': self._use_sync_bn,
-        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon,
-        'kernel_initializer': self._kernel_initializer,
-        'kernel_regularizer': self._kernel_regularizer,
-        'bias_regularizer': self._bias_regularizer,
-    }
-    return config_dict
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-  @property
-  def output_specs(self):
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-@factory.register_backbone_builder('dilated_resnet')
-def build_dilated_resnet(
-    input_specs: tf.keras.layers.InputSpec,
-    backbone_config: hyperparams.Config,
-    norm_activation_config: hyperparams.Config,
-    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
-  """Builds ResNet backbone from a config."""
-  backbone_type = backbone_config.type
-  backbone_cfg = backbone_config.get()
-  assert backbone_type == 'dilated_resnet', (f'Inconsistent backbone type '
-                                             f'{backbone_type}')
-  return DilatedResNet(
-      model_id=backbone_cfg.model_id,
-      output_stride=backbone_cfg.output_stride,
-      input_specs=input_specs,
-      stem_type=backbone_cfg.stem_type,
-      se_ratio=backbone_cfg.se_ratio,
-      init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
-      multigrid=backbone_cfg.multigrid,
-      last_stage_repeats=backbone_cfg.last_stage_repeats,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
--- a/official/vision/modeling/backbones/resnet_deeplab_test.py
+++ b/official/vision/modeling/backbones/resnet_deeplab_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Tests for resnet_deeplab models."""
-# Import libraries
-from absl.testing import parameterized
-import numpy as np
-import tensorflow as tf
-from tensorflow.python.distribute import combinations
-from tensorflow.python.distribute import strategy_combinations
-from official.vision.modeling.backbones import resnet_deeplab
-class ResNetTest(parameterized.TestCase, tf.test.TestCase):
-  @parameterized.parameters(
-      (128, 50, 4, 8),
-      (128, 101, 4, 8),
-      (128, 50, 4, 16),
-      (128, 101, 4, 16),
-  )
-  def test_network_creation(self, input_size, model_id,
-                            endpoint_filter_scale, output_stride):
-    """Test creation of ResNet models."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    network = resnet_deeplab.DilatedResNet(model_id=model_id,
-                                           output_stride=output_stride)
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    endpoints = network(inputs)
-    print(endpoints)
-    self.assertAllEqual([
-        1, input_size / output_stride, input_size / output_stride,
-        512 * endpoint_filter_scale
-    ], endpoints[str(int(np.math.log2(output_stride)))].shape.as_list())
-  @parameterized.parameters(
-      ('v0', None, 0.0),
-      ('v1', None, 0.0),
-      ('v1', 0.25, 0.0),
-      ('v1', 0.25, 0.2),
-  )
-  def test_network_features(self, stem_type, se_ratio,
-                            init_stochastic_depth_rate):
-    """Test additional features of ResNet models."""
-    input_size = 128
-    model_id = 50
-    endpoint_filter_scale = 4
-    output_stride = 8
-    tf.keras.backend.set_image_data_format('channels_last')
-    network = resnet_deeplab.DilatedResNet(
-        model_id=model_id,
-        output_stride=output_stride,
-        stem_type=stem_type,
-        se_ratio=se_ratio,
-        init_stochastic_depth_rate=init_stochastic_depth_rate)
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    endpoints = network(inputs)
-    print(endpoints)
-    self.assertAllEqual([
-        1, input_size / output_stride, input_size / output_stride,
-        512 * endpoint_filter_scale
-    ], endpoints[str(int(np.math.log2(output_stride)))].shape.as_list())
-  @combinations.generate(
-      combinations.combine(
-          strategy=[
-              strategy_combinations.cloud_tpu_strategy,
-              strategy_combinations.one_device_strategy_gpu,
-          ],
-          use_sync_bn=[False, True],
-      ))
-  def test_sync_bn_multiple_devices(self, strategy, use_sync_bn):
-    """Test for sync bn on TPU and GPU devices."""
-    inputs = np.random.rand(64, 128, 128, 3)
-    tf.keras.backend.set_image_data_format('channels_last')
-    with strategy.scope():
-      network = resnet_deeplab.DilatedResNet(
-          model_id=50, output_stride=8, use_sync_bn=use_sync_bn)
-      _ = network(inputs)
-  @parameterized.parameters(1, 3, 4)
-  def test_input_specs(self, input_dim):
-    """Test different input feature dimensions."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
-    network = resnet_deeplab.DilatedResNet(
-        model_id=50, output_stride=8, input_specs=input_specs)
-    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
-    _ = network(inputs)
-  def test_serialize_deserialize(self):
-    # Create a network object that sets all of its config options.
-    kwargs = dict(
-        model_id=50,
-        output_stride=8,
-        stem_type='v0',
-        se_ratio=0.25,
-        init_stochastic_depth_rate=0.2,
-        use_sync_bn=False,
-        activation='relu',
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_initializer='VarianceScaling',
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    network = resnet_deeplab.DilatedResNet(**kwargs)
-    expected_config = dict(kwargs)
-    self.assertEqual(network.get_config(), expected_config)
-    # Create another network object from the first object's config.
-    new_network = resnet_deeplab.DilatedResNet.from_config(network.get_config())
-    # Validate that the config can be forced to JSON.
-    _ = new_network.to_json()
-    # If the serialization was successful, the new config should match the old.
-    self.assertAllEqual(network.get_config(), new_network.get_config())
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/modeling/backbones/resnet_test.py
+++ b/official/vision/modeling/backbones/resnet_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Tests for resnet."""
-# Import libraries
-from absl.testing import parameterized
-import numpy as np
-import tensorflow as tf
-from tensorflow.python.distribute import combinations
-from tensorflow.python.distribute import strategy_combinations
-from official.vision.modeling.backbones import resnet
-class ResNetTest(parameterized.TestCase, tf.test.TestCase):
-  @parameterized.parameters(
-      (128, 10, 1),
-      (128, 18, 1),
-      (128, 34, 1),
-      (128, 50, 4),
-      (128, 101, 4),
-      (128, 152, 4),
-  )
-  def test_network_creation(self, input_size, model_id,
-                            endpoint_filter_scale):
-    """Test creation of ResNet family models."""
-    resnet_params = {
-        10: 4915904,
-        18: 11190464,
-        34: 21306048,
-        50: 23561152,
-        101: 42605504,
-        152: 58295232,
-    }
-    tf.keras.backend.set_image_data_format('channels_last')
-    network = resnet.ResNet(model_id=model_id)
-    self.assertEqual(network.count_params(), resnet_params[model_id])
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    endpoints = network(inputs)
-    self.assertAllEqual(
-        [1, input_size / 2**2, input_size / 2**2, 64 * endpoint_filter_scale],
-        endpoints['2'].shape.as_list())
-    self.assertAllEqual(
-        [1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale],
-        endpoints['3'].shape.as_list())
-    self.assertAllEqual(
-        [1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale],
-        endpoints['4'].shape.as_list())
-    self.assertAllEqual(
-        [1, input_size / 2**5, input_size / 2**5, 512 * endpoint_filter_scale],
-        endpoints['5'].shape.as_list())
-  @combinations.generate(
-      combinations.combine(
-          strategy=[
-              strategy_combinations.cloud_tpu_strategy,
-              strategy_combinations.one_device_strategy_gpu,
-          ],
-          use_sync_bn=[False, True],
-      ))
-  def test_sync_bn_multiple_devices(self, strategy, use_sync_bn):
-    """Test for sync bn on TPU and GPU devices."""
-    inputs = np.random.rand(64, 128, 128, 3)
-    tf.keras.backend.set_image_data_format('channels_last')
-    with strategy.scope():
-      network = resnet.ResNet(model_id=50, use_sync_bn=use_sync_bn)
-      _ = network(inputs)
-  @parameterized.parameters(
-      (128, 34, 1, 'v0', None, 0.0, 1.0, False, False),
-      (128, 34, 1, 'v1', 0.25, 0.2, 1.25, True, True),
-      (128, 50, 4, 'v0', None, 0.0, 1.5, False, False),
-      (128, 50, 4, 'v1', 0.25, 0.2, 2.0, True, True),
-  )
-  def test_resnet_rs(self, input_size, model_id, endpoint_filter_scale,
-                     stem_type, se_ratio, init_stochastic_depth_rate,
-                     depth_multiplier, resnetd_shortcut, replace_stem_max_pool):
-    """Test creation of ResNet family models."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    network = resnet.ResNet(
-        model_id=model_id,
-        depth_multiplier=depth_multiplier,
-        stem_type=stem_type,
-        resnetd_shortcut=resnetd_shortcut,
-        replace_stem_max_pool=replace_stem_max_pool,
-        se_ratio=se_ratio,
-        init_stochastic_depth_rate=init_stochastic_depth_rate)
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    _ = network(inputs)
-  @parameterized.parameters(1, 3, 4)
-  def test_input_specs(self, input_dim):
-    """Test different input feature dimensions."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
-    network = resnet.ResNet(model_id=50, input_specs=input_specs)
-    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
-    _ = network(inputs)
-  def test_serialize_deserialize(self):
-    # Create a network object that sets all of its config options.
-    kwargs = dict(
-        model_id=50,
-        depth_multiplier=1.0,
-        stem_type='v0',
-        se_ratio=None,
-        resnetd_shortcut=False,
-        replace_stem_max_pool=False,
-        init_stochastic_depth_rate=0.0,
-        scale_stem=True,
-        use_sync_bn=False,
-        activation='relu',
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_initializer='VarianceScaling',
-        kernel_regularizer=None,
-        bias_regularizer=None,
-        bn_trainable=True)
-    network = resnet.ResNet(**kwargs)
-    expected_config = dict(kwargs)
-    self.assertEqual(network.get_config(), expected_config)
-    # Create another network object from the first object's config.
-    new_network = resnet.ResNet.from_config(network.get_config())
-    # Validate that the config can be forced to JSON.
-    _ = new_network.to_json()
-    # If the serialization was successful, the new config should match the old.
-    self.assertAllEqual(network.get_config(), new_network.get_config())
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/modeling/backbones/revnet.py
+++ b/official/vision/modeling/backbones/revnet.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Contains definitions of RevNet."""
-from typing import Any, Callable, Dict, Optional
-# Import libraries
-import tensorflow as tf
-from official.modeling import hyperparams
-from official.modeling import tf_utils
-from official.vision.modeling.backbones import factory
-from official.vision.modeling.layers import nn_blocks
-# Specifications for different RevNet variants.
-# Each entry specifies block configurations of the particular RevNet variant.
-# Each element in the block configuration is in the following format:
-# (block_fn, num_filters, block_repeats)
-REVNET_SPECS = {
-    38: [
-        ('residual', 32, 3),
-        ('residual', 64, 3),
-        ('residual', 112, 3),
-    ],
-    56: [
-        ('bottleneck', 128, 2),
-        ('bottleneck', 256, 2),
-        ('bottleneck', 512, 3),
-        ('bottleneck', 832, 2),
-    ],
-    104: [
-        ('bottleneck', 128, 2),
-        ('bottleneck', 256, 2),
-        ('bottleneck', 512, 11),
-        ('bottleneck', 832, 2),
-    ],
-}
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class RevNet(tf.keras.Model):
-  """Creates a Reversible ResNet (RevNet) family model.
-  This implements:
-    Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse.
-    The Reversible Residual Network: Backpropagation Without Storing
-    Activations.
-    (https://arxiv.org/pdf/1707.04585.pdf)
-  """
-  def __init__(
-      self,
-      model_id: int,
-      input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec(
-          shape=[None, None, None, 3]),
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      **kwargs):
-    """Initializes a RevNet model.
-    Args:
-      model_id: An `int` of depth/id of ResNet backbone model.
-      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
-      activation: A `str` name of the activation function.
-      use_sync_bn: If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_initializer: A str for kernel initializer of convolutional layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default to None.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    self._model_id = model_id
-    self._input_specs = input_specs
-    self._use_sync_bn = use_sync_bn
-    self._activation = activation
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    self._kernel_initializer = kernel_initializer
-    self._kernel_regularizer = kernel_regularizer
-    if use_sync_bn:
-      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
-    else:
-      self._norm = tf.keras.layers.BatchNormalization
-    axis = -1 if tf.keras.backend.image_data_format() == 'channels_last' else 1
-    # Build RevNet.
-    inputs = tf.keras.Input(shape=input_specs.shape[1:])
-    x = tf.keras.layers.Conv2D(
-        filters=REVNET_SPECS[model_id][0][1],
-        kernel_size=7, strides=2, use_bias=False, padding='same',
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer)(inputs)
-    x = self._norm(
-        axis=axis, momentum=norm_momentum, epsilon=norm_epsilon)(x)
-    x = tf_utils.get_activation(activation)(x)
-    x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
-    endpoints = {}
-    for i, spec in enumerate(REVNET_SPECS[model_id]):
-      if spec[0] == 'residual':
-        inner_block_fn = nn_blocks.ResidualInner
-      elif spec[0] == 'bottleneck':
-        inner_block_fn = nn_blocks.BottleneckResidualInner
-      else:
-        raise ValueError('Block fn `{}` is not supported.'.format(spec[0]))
-      if spec[1] % 2 != 0:
-        raise ValueError('Number of output filters must be even to ensure '
-                         'splitting in channel dimension for reversible blocks')
-      x = self._block_group(
-          inputs=x,
-          filters=spec[1],
-          strides=(1 if i == 0 else 2),
-          inner_block_fn=inner_block_fn,
-          block_repeats=spec[2],
-          batch_norm_first=(i != 0),  # Only skip on first block
-          name='revblock_group_{}'.format(i + 2))
-      endpoints[str(i + 2)] = x
-    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
-    super(RevNet, self).__init__(inputs=inputs, outputs=endpoints, **kwargs)
-  def _block_group(self,
-                   inputs: tf.Tensor,
-                   filters: int,
-                   strides: int,
-                   inner_block_fn: Callable[..., tf.keras.layers.Layer],
-                   block_repeats: int,
-                   batch_norm_first: bool,
-                   name: str = 'revblock_group') -> tf.Tensor:
-    """Creates one reversible block for RevNet model.
-    Args:
-      inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
-      filters: An `int` number of filters for the first convolution of the
-        layer.
-      strides: An `int` stride to use for the first convolution of the layer. If
-        greater than 1, this block group will downsample the input.
-      inner_block_fn: Either `nn_blocks.ResidualInner` or
-        `nn_blocks.BottleneckResidualInner`.
-      block_repeats: An `int` number of blocks contained in this block group.
-      batch_norm_first: A `bool` that specifies whether to apply
-        BatchNormalization and activation layer before feeding into convolution
-        layers.
-      name: A `str` name for the block.
-    Returns:
-      The output `tf.Tensor` of the block layer.
-    """
-    x = inputs
-    for i in range(block_repeats):
-      is_first_block = i == 0
-      # Only first residual layer in block gets downsampled
-      curr_strides = strides if is_first_block else 1
-      f = inner_block_fn(
-          filters=filters // 2,
-          strides=curr_strides,
-          batch_norm_first=batch_norm_first and is_first_block,
-          kernel_regularizer=self._kernel_regularizer)
-      g = inner_block_fn(
-          filters=filters // 2,
-          strides=1,
-          batch_norm_first=batch_norm_first and is_first_block,
-          kernel_regularizer=self._kernel_regularizer)
-      x = nn_blocks.ReversibleLayer(f, g)(x)
-    return tf.identity(x, name=name)
-  def get_config(self) -> Dict[str, Any]:
-    config_dict = {
-        'model_id': self._model_id,
-        'activation': self._activation,
-        'use_sync_bn': self._use_sync_bn,
-        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon,
-        'kernel_initializer': self._kernel_initializer,
-        'kernel_regularizer': self._kernel_regularizer,
-    }
-    return config_dict
-  @classmethod
-  def from_config(cls,
-                  config: Dict[str, Any],
-                  custom_objects: Optional[Any] = None) -> tf.keras.Model:
-    return cls(**config)
-  @property
-  def output_specs(self) -> Dict[int, tf.TensorShape]:
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-@factory.register_backbone_builder('revnet')
-def build_revnet(
-    input_specs: tf.keras.layers.InputSpec,
-    backbone_config: hyperparams.Config,
-    norm_activation_config: hyperparams.Config,
-    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
-  """Builds RevNet backbone from a config."""
-  backbone_type = backbone_config.type
-  backbone_cfg = backbone_config.get()
-  assert backbone_type == 'revnet', (f'Inconsistent backbone type '
-                                     f'{backbone_type}')
-  return RevNet(
-      model_id=backbone_cfg.model_id,
-      input_specs=input_specs,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
--- a/official/vision/modeling/backbones/revnet_test.py
+++ b/official/vision/modeling/backbones/revnet_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Tests for RevNet."""
-# Import libraries
-from absl.testing import parameterized
-import tensorflow as tf
-from official.vision.modeling.backbones import revnet
-class RevNetTest(parameterized.TestCase, tf.test.TestCase):
-  @parameterized.parameters(
-      (128, 56, 4),
-      (128, 104, 4),
-  )
-  def test_network_creation(self, input_size, model_id,
-                            endpoint_filter_scale):
-    """Test creation of RevNet family models."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    network = revnet.RevNet(model_id=model_id)
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-    endpoints = network(inputs)
-    network.summary()
-    self.assertAllEqual(
-        [1, input_size / 2**2, input_size / 2**2, 128 * endpoint_filter_scale],
-        endpoints['2'].shape.as_list())
-    self.assertAllEqual(
-        [1, input_size / 2**3, input_size / 2**3, 256 * endpoint_filter_scale],
-        endpoints['3'].shape.as_list())
-    self.assertAllEqual(
-        [1, input_size / 2**4, input_size / 2**4, 512 * endpoint_filter_scale],
-        endpoints['4'].shape.as_list())
-    self.assertAllEqual(
-        [1, input_size / 2**5, input_size / 2**5, 832 * endpoint_filter_scale],
-        endpoints['5'].shape.as_list())
-  @parameterized.parameters(1, 3, 4)
-  def test_input_specs(self, input_dim):
-    """Test different input feature dimensions."""
-    tf.keras.backend.set_image_data_format('channels_last')
-    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
-    network = revnet.RevNet(model_id=56, input_specs=input_specs)
-    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
-    _ = network(inputs)
-  def test_serialize_deserialize(self):
-    # Create a network object that sets all of its config options.
-    kwargs = dict(
-        model_id=56,
-        activation='relu',
-        use_sync_bn=False,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_initializer='VarianceScaling',
-        kernel_regularizer=None,
-    )
-    network = revnet.RevNet(**kwargs)
-    expected_config = dict(kwargs)
-    self.assertEqual(network.get_config(), expected_config)
-    # Create another network object from the first object's config.
-    new_network = revnet.RevNet.from_config(network.get_config())
-    # Validate that the config can be forced to JSON.
-    _ = new_network.to_json()
-    # If the serialization was successful, the new config should match the old.
-    self.assertAllEqual(network.get_config(), new_network.get_config())
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/modeling/backbones/spinenet.py
+++ b/official/vision/modeling/backbones/spinenet.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains definitions of SpineNet Networks."""
-import math
-from typing import Any, List, Optional, Tuple
-# Import libraries
-from absl import logging
-import tensorflow as tf
-from official.modeling import hyperparams
-from official.modeling import tf_utils
-from official.vision.modeling.backbones import factory
-from official.vision.modeling.layers import nn_blocks
-from official.vision.modeling.layers import nn_layers
-from official.vision.ops import spatial_transform_ops
-layers = tf.keras.layers
-FILTER_SIZE_MAP = {
-    1: 32,
-    2: 64,
-    3: 128,
-    4: 256,
-    5: 256,
-    6: 256,
-    7: 256,
-}
-# The fixed SpineNet architecture discovered by NAS.
-# Each element represents a specification of a building block:
-#   (block_level, block_fn, (input_offset0, input_offset1), is_output).
-SPINENET_BLOCK_SPECS = [
-    (2, 'bottleneck', (0, 1), False),
-    (4, 'residual', (0, 1), False),
-    (3, 'bottleneck', (2, 3), False),
-    (4, 'bottleneck', (2, 4), False),
-    (6, 'residual', (3, 5), False),
-    (4, 'bottleneck', (3, 5), False),
-    (5, 'residual', (6, 7), False),
-    (7, 'residual', (6, 8), False),
-    (5, 'bottleneck', (8, 9), False),
-    (5, 'bottleneck', (8, 10), False),
-    (4, 'bottleneck', (5, 10), True),
-    (3, 'bottleneck', (4, 10), True),
-    (5, 'bottleneck', (7, 12), True),
-    (7, 'bottleneck', (5, 14), True),
-    (6, 'bottleneck', (12, 14), True),
-    (2, 'bottleneck', (2, 13), True),
-]
-SCALING_MAP = {
-    '49S': {
-        'endpoints_num_filters': 128,
-        'filter_size_scale': 0.65,
-        'resample_alpha': 0.5,
-        'block_repeats': 1,
-    },
-    '49': {
-        'endpoints_num_filters': 256,
-        'filter_size_scale': 1.0,
-        'resample_alpha': 0.5,
-        'block_repeats': 1,
-    },
-    '96': {
-        'endpoints_num_filters': 256,
-        'filter_size_scale': 1.0,
-        'resample_alpha': 0.5,
-        'block_repeats': 2,
-    },
-    '143': {
-        'endpoints_num_filters': 256,
-        'filter_size_scale': 1.0,
-        'resample_alpha': 1.0,
-        'block_repeats': 3,
-    },
-    # SpineNet-143 with 1.3x filter_size_scale.
-    '143L': {
-        'endpoints_num_filters': 256,
-        'filter_size_scale': 1.3,
-        'resample_alpha': 1.0,
-        'block_repeats': 3,
-    },
-    '190': {
-        'endpoints_num_filters': 512,
-        'filter_size_scale': 1.3,
-        'resample_alpha': 1.0,
-        'block_repeats': 4,
-    },
-}
-class BlockSpec(object):
-  """A container class that specifies the block configuration for SpineNet."""
-  def __init__(self, level: int, block_fn: str, input_offsets: Tuple[int, int],
-               is_output: bool):
-    self.level = level
-    self.block_fn = block_fn
-    self.input_offsets = input_offsets
-    self.is_output = is_output
-def build_block_specs(
-    block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]:
-  """Builds the list of BlockSpec objects for SpineNet."""
-  if not block_specs:
-    block_specs = SPINENET_BLOCK_SPECS
-  logging.info('Building SpineNet block specs: %s', block_specs)
-  return [BlockSpec(*b) for b in block_specs]
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class SpineNet(tf.keras.Model):
-  """Creates a SpineNet family model.
-  This implements:
-    Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Golnaz Ghiasi, Mingxing Tan,
-    Yin Cui, Quoc V. Le, Xiaodan Song.
-    SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization.
-    (https://arxiv.org/abs/1912.05027)
-  """
-  def __init__(
-      self,
-      input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec(
-          shape=[None, None, None, 3]),
-      min_level: int = 3,
-      max_level: int = 7,
-      block_specs: List[BlockSpec] = build_block_specs(),
-      endpoints_num_filters: int = 256,
-      resample_alpha: float = 0.5,
-      block_repeats: int = 1,
-      filter_size_scale: float = 1.0,
-      init_stochastic_depth_rate: float = 0.0,
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      **kwargs):
-    """Initializes a SpineNet model.
-    Args:
-      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
-      min_level: An `int` of min level for output mutiscale features.
-      max_level: An `int` of max level for output mutiscale features.
-      block_specs: A list of block specifications for the SpineNet model
-        discovered by NAS.
-      endpoints_num_filters: An `int` of feature dimension for the output
-        endpoints.
-      resample_alpha: A `float` of resampling factor in cross-scale connections.
-      block_repeats: An `int` of number of blocks contained in the layer.
-      filter_size_scale: A `float` of multiplier for the filters (number of
-        channels) for all convolution ops. The value must be greater than zero.
-        Typical usage will be to set this value in (0, 1) to reduce the number
-        of parameters or computation cost of the model.
-      init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
-      kernel_initializer: A str for kernel initializer of convolutional layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default to None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-        Default to None.
-      activation: A `str` name of the activation function.
-      use_sync_bn: If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A small `float` added to variance to avoid dividing by zero.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    self._input_specs = input_specs
-    self._min_level = min_level
-    self._max_level = max_level
-    self._block_specs = block_specs
-    self._endpoints_num_filters = endpoints_num_filters
-    self._resample_alpha = resample_alpha
-    self._block_repeats = block_repeats
-    self._filter_size_scale = filter_size_scale
-    self._init_stochastic_depth_rate = init_stochastic_depth_rate
-    self._kernel_initializer = kernel_initializer
-    self._kernel_regularizer = kernel_regularizer
-    self._bias_regularizer = bias_regularizer
-    self._activation = activation
-    self._use_sync_bn = use_sync_bn
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    if activation == 'relu':
-      self._activation_fn = tf.nn.relu
-    elif activation == 'swish':
-      self._activation_fn = tf.nn.swish
-    else:
-      raise ValueError('Activation {} not implemented.'.format(activation))
-    self._init_block_fn = 'bottleneck'
-    self._num_init_blocks = 2
-    if use_sync_bn:
-      self._norm = layers.experimental.SyncBatchNormalization
-    else:
-      self._norm = layers.BatchNormalization
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._bn_axis = -1
-    else:
-      self._bn_axis = 1
-    # Build SpineNet.
-    inputs = tf.keras.Input(shape=input_specs.shape[1:])
-    net = self._build_stem(inputs=inputs)
-    input_width = input_specs.shape[2]
-    if input_width is None:
-      max_stride = max(map(lambda b: b.level, block_specs))
-      input_width = 2 ** max_stride
-    net = self._build_scale_permuted_network(net=net, input_width=input_width)
-    endpoints = self._build_endpoints(net=net)
-    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
-    super(SpineNet, self).__init__(inputs=inputs, outputs=endpoints)
-  def _block_group(self,
-                   inputs: tf.Tensor,
-                   filters: int,
-                   strides: int,
-                   block_fn_cand: str,
-                   block_repeats: int = 1,
-                   stochastic_depth_drop_rate: Optional[float] = None,
-                   name: str = 'block_group'):
-    """Creates one group of blocks for the SpineNet model."""
-    block_fn_candidates = {
-        'bottleneck': nn_blocks.BottleneckBlock,
-        'residual': nn_blocks.ResidualBlock,
-    }
-    block_fn = block_fn_candidates[block_fn_cand]
-    _, _, _, num_filters = inputs.get_shape().as_list()
-    if block_fn_cand == 'bottleneck':
-      use_projection = not (num_filters == (filters * 4) and strides == 1)
-    else:
-      use_projection = not (num_filters == filters and strides == 1)
-    x = block_fn(
-        filters=filters,
-        strides=strides,
-        use_projection=use_projection,
-        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer,
-        activation=self._activation,
-        use_sync_bn=self._use_sync_bn,
-        norm_momentum=self._norm_momentum,
-        norm_epsilon=self._norm_epsilon)(
-            inputs)
-    for _ in range(1, block_repeats):
-      x = block_fn(
-          filters=filters,
-          strides=1,
-          use_projection=False,
-          stochastic_depth_drop_rate=stochastic_depth_drop_rate,
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer,
-          activation=self._activation,
-          use_sync_bn=self._use_sync_bn,
-          norm_momentum=self._norm_momentum,
-          norm_epsilon=self._norm_epsilon)(
-              x)
-    return tf.identity(x, name=name)
-  def _build_stem(self, inputs):
-    """Builds SpineNet stem."""
-    x = layers.Conv2D(
-        filters=64,
-        kernel_size=7,
-        strides=2,
-        use_bias=False,
-        padding='same',
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer)(
-            inputs)
-    x = self._norm(
-        axis=self._bn_axis,
-        momentum=self._norm_momentum,
-        epsilon=self._norm_epsilon)(
-            x)
-    x = tf_utils.get_activation(self._activation_fn)(x)
-    x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
-    net = []
-    # Build the initial level 2 blocks.
-    for i in range(self._num_init_blocks):
-      x = self._block_group(
-          inputs=x,
-          filters=int(FILTER_SIZE_MAP[2] * self._filter_size_scale),
-          strides=1,
-          block_fn_cand=self._init_block_fn,
-          block_repeats=self._block_repeats,
-          name='stem_block_{}'.format(i + 1))
-      net.append(x)
-    return net
-  def _build_scale_permuted_network(self,
-                                    net,
-                                    input_width,
-                                    weighted_fusion=False):
-    """Builds scale-permuted network."""
-    net_sizes = [int(math.ceil(input_width / 2**2))] * len(net)
-    net_block_fns = [self._init_block_fn] * len(net)
-    num_outgoing_connections = [0] * len(net)
-    endpoints = {}
-    for i, block_spec in enumerate(self._block_specs):
-      # Find out specs for the target block.
-      target_width = int(math.ceil(input_width / 2**block_spec.level))
-      target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
-                               self._filter_size_scale)
-      target_block_fn = block_spec.block_fn
-      # Resample then merge input0 and input1.
-      parents = []
-      input0 = block_spec.input_offsets[0]
-      input1 = block_spec.input_offsets[1]
-      x0 = self._resample_with_alpha(
-          inputs=net[input0],
-          input_width=net_sizes[input0],
-          input_block_fn=net_block_fns[input0],
-          target_width=target_width,
-          target_num_filters=target_num_filters,
-          target_block_fn=target_block_fn,
-          alpha=self._resample_alpha)
-      parents.append(x0)
-      num_outgoing_connections[input0] += 1
-      x1 = self._resample_with_alpha(
-          inputs=net[input1],
-          input_width=net_sizes[input1],
-          input_block_fn=net_block_fns[input1],
-          target_width=target_width,
-          target_num_filters=target_num_filters,
-          target_block_fn=target_block_fn,
-          alpha=self._resample_alpha)
-      parents.append(x1)
-      num_outgoing_connections[input1] += 1
-      # Merge 0 outdegree blocks to the output block.
-      if block_spec.is_output:
-        for j, (j_feat,
-                j_connections) in enumerate(zip(net, num_outgoing_connections)):
-          if j_connections == 0 and (j_feat.shape[2] == target_width and
-                                     j_feat.shape[3] == x0.shape[3]):
-            parents.append(j_feat)
-            num_outgoing_connections[j] += 1
-      # pylint: disable=g-direct-tensorflow-import
-      if weighted_fusion:
-        dtype = parents[0].dtype
-        parent_weights = [
-            tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
-                i, j)), dtype=dtype)) for j in range(len(parents))]
-        weights_sum = tf.add_n(parent_weights)
-        parents = [
-            parents[i] * parent_weights[i] / (weights_sum + 0.0001)
-            for i in range(len(parents))
-        ]
-      # Fuse all parent nodes then build a new block.
-      x = tf_utils.get_activation(self._activation_fn)(tf.add_n(parents))
-      x = self._block_group(
-          inputs=x,
-          filters=target_num_filters,
-          strides=1,
-          block_fn_cand=target_block_fn,
-          block_repeats=self._block_repeats,
-          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
-              self._init_stochastic_depth_rate, i + 1, len(self._block_specs)),
-          name='scale_permuted_block_{}'.format(i + 1))
-      net.append(x)
-      net_sizes.append(target_width)
-      net_block_fns.append(target_block_fn)
-      num_outgoing_connections.append(0)
-      # Save output feats.
-      if block_spec.is_output:
-        if block_spec.level in endpoints:
-          raise ValueError('Duplicate feats found for output level {}.'.format(
-              block_spec.level))
-        if (block_spec.level < self._min_level or
-            block_spec.level > self._max_level):
-          logging.warning(
-              'SpineNet output level out of range [min_level, max_level] = '
-              '[%s, %s] will not be used for further processing.',
-              self._min_level, self._max_level)
-        endpoints[str(block_spec.level)] = x
-    return endpoints
-  def _build_endpoints(self, net):
-    """Matches filter size for endpoints before sharing conv layers."""
-    endpoints = {}
-    for level in range(self._min_level, self._max_level + 1):
-      x = layers.Conv2D(
-          filters=self._endpoints_num_filters,
-          kernel_size=1,
-          strides=1,
-          use_bias=False,
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              net[str(level)])
-      x = self._norm(
-          axis=self._bn_axis,
-          momentum=self._norm_momentum,
-          epsilon=self._norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(self._activation_fn)(x)
-      endpoints[str(level)] = x
-    return endpoints
-  def _resample_with_alpha(self,
-                           inputs,
-                           input_width,
-                           input_block_fn,
-                           target_width,
-                           target_num_filters,
-                           target_block_fn,
-                           alpha=0.5):
-    """Matches resolution and feature dimension."""
-    _, _, _, input_num_filters = inputs.get_shape().as_list()
-    if input_block_fn == 'bottleneck':
-      input_num_filters /= 4
-    new_num_filters = int(input_num_filters * alpha)
-    x = layers.Conv2D(
-        filters=new_num_filters,
-        kernel_size=1,
-        strides=1,
-        use_bias=False,
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer)(
-            inputs)
-    x = self._norm(
-        axis=self._bn_axis,
-        momentum=self._norm_momentum,
-        epsilon=self._norm_epsilon)(
-            x)
-    x = tf_utils.get_activation(self._activation_fn)(x)
-    # Spatial resampling.
-    if input_width > target_width:
-      x = layers.Conv2D(
-          filters=new_num_filters,
-          kernel_size=3,
-          strides=2,
-          padding='SAME',
-          use_bias=False,
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              x)
-      x = self._norm(
-          axis=self._bn_axis,
-          momentum=self._norm_momentum,
-          epsilon=self._norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(self._activation_fn)(x)
-      input_width /= 2
-      while input_width > target_width:
-        x = layers.MaxPool2D(pool_size=3, strides=2, padding='SAME')(x)
-        input_width /= 2
-    elif input_width < target_width:
-      scale = target_width // input_width
-      x = spatial_transform_ops.nearest_upsampling(x, scale=scale)
-    # Last 1x1 conv to match filter size.
-    if target_block_fn == 'bottleneck':
-      target_num_filters *= 4
-    x = layers.Conv2D(
-        filters=target_num_filters,
-        kernel_size=1,
-        strides=1,
-        use_bias=False,
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer)(
-            x)
-    x = self._norm(
-        axis=self._bn_axis,
-        momentum=self._norm_momentum,
-        epsilon=self._norm_epsilon)(
-            x)
-    return x
-  def get_config(self):
-    config_dict = {
-        'min_level': self._min_level,
-        'max_level': self._max_level,
-        'endpoints_num_filters': self._endpoints_num_filters,
-        'resample_alpha': self._resample_alpha,
-        'block_repeats': self._block_repeats,
-        'filter_size_scale': self._filter_size_scale,
-        'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
-        'kernel_initializer': self._kernel_initializer,
-        'kernel_regularizer': self._kernel_regularizer,
-        'bias_regularizer': self._bias_regularizer,
-        'activation': self._activation,
-        'use_sync_bn': self._use_sync_bn,
-        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon
-    }
-    return config_dict
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-  @property
-  def output_specs(self):
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-@factory.register_backbone_builder('spinenet')
-def build_spinenet(
-    input_specs: tf.keras.layers.InputSpec,
-    backbone_config: hyperparams.Config,
-    norm_activation_config: hyperparams.Config,
-    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
-  """Builds SpineNet backbone from a config."""
-  backbone_type = backbone_config.type
-  backbone_cfg = backbone_config.get()
-  assert backbone_type == 'spinenet', (f'Inconsistent backbone type '
-                                       f'{backbone_type}')
-  model_id = backbone_cfg.model_id
-  if model_id not in SCALING_MAP:
-    raise ValueError(
-        'SpineNet-{} is not a valid architecture.'.format(model_id))
-  scaling_params = SCALING_MAP[model_id]
-  return SpineNet(
-      input_specs=input_specs,
-      min_level=backbone_cfg.min_level,
-      max_level=backbone_cfg.max_level,
-      endpoints_num_filters=scaling_params['endpoints_num_filters'],
-      resample_alpha=scaling_params['resample_alpha'],
-      block_repeats=scaling_params['block_repeats'],
-      filter_size_scale=scaling_params['filter_size_scale'],
-      init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
-      kernel_regularizer=l2_regularizer,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon)
--- a/official/vision/modeling/backbones/spinenet_mobile.py
+++ b/official/vision/modeling/backbones/spinenet_mobile.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains definitions of Mobile SpineNet Networks."""
-import math
-from typing import Any, List, Optional, Tuple
-# Import libraries
-from absl import logging
-import tensorflow as tf
-from official.modeling import hyperparams
-from official.modeling import tf_utils
-from official.vision.modeling.backbones import factory
-from official.vision.modeling.layers import nn_blocks
-from official.vision.modeling.layers import nn_layers
-from official.vision.ops import spatial_transform_ops
-layers = tf.keras.layers
-FILTER_SIZE_MAP = {
-    0: 8,
-    1: 16,
-    2: 24,
-    3: 40,
-    4: 80,
-    5: 112,
-    6: 112,
-    7: 112,
-}
-# The fixed SpineNet architecture discovered by NAS.
-# Each element represents a specification of a building block:
-#   (block_level, block_fn, (input_offset0, input_offset1), is_output).
-SPINENET_BLOCK_SPECS = [
-    (2, 'mbconv', (0, 1), False),
-    (2, 'mbconv', (1, 2), False),
-    (4, 'mbconv', (1, 2), False),
-    (3, 'mbconv', (3, 4), False),
-    (4, 'mbconv', (3, 5), False),
-    (6, 'mbconv', (4, 6), False),
-    (4, 'mbconv', (4, 6), False),
-    (5, 'mbconv', (7, 8), False),
-    (7, 'mbconv', (7, 9), False),
-    (5, 'mbconv', (9, 10), False),
-    (5, 'mbconv', (9, 11), False),
-    (4, 'mbconv', (6, 11), True),
-    (3, 'mbconv', (5, 11), True),
-    (5, 'mbconv', (8, 13), True),
-    (7, 'mbconv', (6, 15), True),
-    (6, 'mbconv', (13, 15), True),
-]
-SCALING_MAP = {
-    '49': {
-        'endpoints_num_filters': 48,
-        'filter_size_scale': 1.0,
-        'block_repeats': 1,
-    },
-    '49S': {
-        'endpoints_num_filters': 40,
-        'filter_size_scale': 0.65,
-        'block_repeats': 1,
-    },
-    '49XS': {
-        'endpoints_num_filters': 24,
-        'filter_size_scale': 0.6,
-        'block_repeats': 1,
-    },
-}
-class BlockSpec(object):
-  """A container class that specifies the block configuration for SpineNet."""
-  def __init__(self, level: int, block_fn: str, input_offsets: Tuple[int, int],
-               is_output: bool):
-    self.level = level
-    self.block_fn = block_fn
-    self.input_offsets = input_offsets
-    self.is_output = is_output
-def build_block_specs(
-    block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]:
-  """Builds the list of BlockSpec objects for SpineNet."""
-  if not block_specs:
-    block_specs = SPINENET_BLOCK_SPECS
-  logging.info('Building SpineNet block specs: %s', block_specs)
-  return [BlockSpec(*b) for b in block_specs]
-@tf.keras.utils.register_keras_serializable(package='Vision')
-class SpineNetMobile(tf.keras.Model):
-  """Creates a Mobile SpineNet family model.
-  This implements:
-    [1] Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Golnaz Ghiasi, Mingxing Tan,
-    Yin Cui, Quoc V. Le, Xiaodan Song.
-    SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization.
-    (https://arxiv.org/abs/1912.05027).
-    [2] Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Yin Cui, Mingxing Tan,
-    Quoc Le, Xiaodan Song.
-    Efficient Scale-Permuted Backbone with Learned Resource Distribution.
-    (https://arxiv.org/abs/2010.11426).
-  """
-  def __init__(
-      self,
-      input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec(
-          shape=[None, None, None, 3]),
-      min_level: int = 3,
-      max_level: int = 7,
-      block_specs: List[BlockSpec] = build_block_specs(),
-      endpoints_num_filters: int = 256,
-      se_ratio: float = 0.2,
-      block_repeats: int = 1,
-      filter_size_scale: float = 1.0,
-      expand_ratio: int = 6,
-      init_stochastic_depth_rate=0.0,
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      use_keras_upsampling_2d: bool = False,
-      **kwargs):
-    """Initializes a Mobile SpineNet model.
-    Args:
-      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
-      min_level: An `int` of min level for output mutiscale features.
-      max_level: An `int` of max level for output mutiscale features.
-      block_specs: The block specifications for the SpineNet model discovered by
-        NAS.
-      endpoints_num_filters: An `int` of feature dimension for the output
-        endpoints.
-      se_ratio: A `float` of Squeeze-and-Excitation ratio.
-      block_repeats: An `int` of number of blocks contained in the layer.
-      filter_size_scale: A `float` of multiplier for the filters (number of
-        channels) for all convolution ops. The value must be greater than zero.
-        Typical usage will be to set this value in (0, 1) to reduce the number
-        of parameters or computation cost of the model.
-      expand_ratio: An `integer` of expansion ratios for inverted bottleneck
-        blocks.
-      init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
-      kernel_initializer: A str for kernel initializer of convolutional layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default to None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-        Default to None.
-      activation: A `str` name of the activation function.
-      use_sync_bn: If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A small `float` added to variance to avoid dividing by zero.
-      use_keras_upsampling_2d: If True, use keras UpSampling2D layer.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    self._input_specs = input_specs
-    self._min_level = min_level
-    self._max_level = max_level
-    self._block_specs = block_specs
-    self._endpoints_num_filters = endpoints_num_filters
-    self._se_ratio = se_ratio
-    self._block_repeats = block_repeats
-    self._filter_size_scale = filter_size_scale
-    self._expand_ratio = expand_ratio
-    self._init_stochastic_depth_rate = init_stochastic_depth_rate
-    self._kernel_initializer = kernel_initializer
-    self._kernel_regularizer = kernel_regularizer
-    self._bias_regularizer = bias_regularizer
-    self._activation = activation
-    self._use_sync_bn = use_sync_bn
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    self._use_keras_upsampling_2d = use_keras_upsampling_2d
-    self._num_init_blocks = 2
-    if use_sync_bn:
-      self._norm = layers.experimental.SyncBatchNormalization
-    else:
-      self._norm = layers.BatchNormalization
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._bn_axis = -1
-    else:
-      self._bn_axis = 1
-    # Build SpineNet.
-    inputs = tf.keras.Input(shape=input_specs.shape[1:])
-    net = self._build_stem(inputs=inputs)
-    input_width = input_specs.shape[2]
-    if input_width is None:
-      max_stride = max(map(lambda b: b.level, block_specs))
-      input_width = 2 ** max_stride
-    net = self._build_scale_permuted_network(net=net, input_width=input_width)
-    endpoints = self._build_endpoints(net=net)
-    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
-    super().__init__(inputs=inputs, outputs=endpoints)
-  def _block_group(self,
-                   inputs: tf.Tensor,
-                   in_filters: int,
-                   out_filters: int,
-                   strides: int,
-                   expand_ratio: int = 6,
-                   block_repeats: int = 1,
-                   se_ratio: float = 0.2,
-                   stochastic_depth_drop_rate: Optional[float] = None,
-                   name: str = 'block_group'):
-    """Creates one group of blocks for the SpineNet model."""
-    x = nn_blocks.InvertedBottleneckBlock(
-        in_filters=in_filters,
-        out_filters=out_filters,
-        strides=strides,
-        se_ratio=se_ratio,
-        expand_ratio=expand_ratio,
-        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer,
-        activation=self._activation,
-        use_sync_bn=self._use_sync_bn,
-        norm_momentum=self._norm_momentum,
-        norm_epsilon=self._norm_epsilon)(
-            inputs)
-    for _ in range(1, block_repeats):
-      x = nn_blocks.InvertedBottleneckBlock(
-          in_filters=in_filters,
-          out_filters=out_filters,
-          strides=1,
-          se_ratio=se_ratio,
-          expand_ratio=expand_ratio,
-          stochastic_depth_drop_rate=stochastic_depth_drop_rate,
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer,
-          activation=self._activation,
-          use_sync_bn=self._use_sync_bn,
-          norm_momentum=self._norm_momentum,
-          norm_epsilon=self._norm_epsilon)(
-              inputs)
-    return tf.keras.layers.Activation('linear', name=name)(x)
-  def _build_stem(self, inputs):
-    """Builds SpineNet stem."""
-    x = layers.Conv2D(
-        filters=int(FILTER_SIZE_MAP[0] * self._filter_size_scale),
-        kernel_size=3,
-        strides=2,
-        use_bias=False,
-        padding='same',
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer)(
-            inputs)
-    x = self._norm(
-        axis=self._bn_axis,
-        momentum=self._norm_momentum,
-        epsilon=self._norm_epsilon)(
-            x)
-    x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
-    net = []
-    stem_strides = [1, 2]
-    # Build the initial level 2 blocks.
-    for i in range(self._num_init_blocks):
-      x = self._block_group(
-          inputs=x,
-          in_filters=int(FILTER_SIZE_MAP[i] * self._filter_size_scale),
-          out_filters=int(FILTER_SIZE_MAP[i + 1] * self._filter_size_scale),
-          expand_ratio=self._expand_ratio,
-          strides=stem_strides[i],
-          se_ratio=self._se_ratio,
-          block_repeats=self._block_repeats,
-          name='stem_block_{}'.format(i + 1))
-      net.append(x)
-    return net
-  def _build_scale_permuted_network(self,
-                                    net,
-                                    input_width,
-                                    weighted_fusion=False):
-    """Builds scale-permuted network."""
-    net_sizes = [
-        int(math.ceil(input_width / 2)),
-        int(math.ceil(input_width / 2**2))
-    ]
-    num_outgoing_connections = [0] * len(net)
-    endpoints = {}
-    for i, block_spec in enumerate(self._block_specs):
-      # Update block level if it is larger than max_level to avoid building
-      # blocks smaller than requested.
-      block_spec.level = min(block_spec.level, self._max_level)
-      # Find out specs for the target block.
-      target_width = int(math.ceil(input_width / 2**block_spec.level))
-      target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
-                               self._filter_size_scale)
-      # Resample then merge input0 and input1.
-      parents = []
-      input0 = block_spec.input_offsets[0]
-      input1 = block_spec.input_offsets[1]
-      x0 = self._resample_with_sepconv(
-          inputs=net[input0],
-          input_width=net_sizes[input0],
-          target_width=target_width,
-          target_num_filters=target_num_filters)
-      parents.append(x0)
-      num_outgoing_connections[input0] += 1
-      x1 = self._resample_with_sepconv(
-          inputs=net[input1],
-          input_width=net_sizes[input1],
-          target_width=target_width,
-          target_num_filters=target_num_filters)
-      parents.append(x1)
-      num_outgoing_connections[input1] += 1
-      # Merge 0 outdegree blocks to the output block.
-      if block_spec.is_output:
-        for j, (j_feat,
-                j_connections) in enumerate(zip(net, num_outgoing_connections)):
-          if j_connections == 0 and (j_feat.shape[2] == target_width and
-                                     j_feat.shape[3] == x0.shape[3]):
-            parents.append(j_feat)
-            num_outgoing_connections[j] += 1
-      # pylint: disable=g-direct-tensorflow-import
-      if weighted_fusion:
-        dtype = parents[0].dtype
-        parent_weights = [
-            tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
-                i, j)), dtype=dtype)) for j in range(len(parents))]
-        weights_sum = layers.Add()(parent_weights)
-        parents = [
-            parents[i] * parent_weights[i] / (weights_sum + 0.0001)
-            for i in range(len(parents))
-        ]
-      # Fuse all parent nodes then build a new block.
-      x = tf_utils.get_activation(
-          self._activation, use_keras_layer=True)(layers.Add()(parents))
-      x = self._block_group(
-          inputs=x,
-          in_filters=target_num_filters,
-          out_filters=target_num_filters,
-          strides=1,
-          se_ratio=self._se_ratio,
-          expand_ratio=self._expand_ratio,
-          block_repeats=self._block_repeats,
-          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
-              self._init_stochastic_depth_rate, i + 1, len(self._block_specs)),
-          name='scale_permuted_block_{}'.format(i + 1))
-      net.append(x)
-      net_sizes.append(target_width)
-      num_outgoing_connections.append(0)
-      # Save output feats.
-      if block_spec.is_output:
-        if block_spec.level in endpoints:
-          raise ValueError('Duplicate feats found for output level {}.'.format(
-              block_spec.level))
-        if (block_spec.level < self._min_level or
-            block_spec.level > self._max_level):
-          logging.warning(
-              'SpineNet output level out of range [min_level, max_levle] = [%s, %s] will not be used for further processing.',
-              self._min_level, self._max_level)
-        endpoints[str(block_spec.level)] = x
-    return endpoints
-  def _build_endpoints(self, net):
-    """Matches filter size for endpoints before sharing conv layers."""
-    endpoints = {}
-    for level in range(self._min_level, self._max_level + 1):
-      x = layers.Conv2D(
-          filters=self._endpoints_num_filters,
-          kernel_size=1,
-          strides=1,
-          use_bias=False,
-          kernel_initializer=self._kernel_initializer,
-          kernel_regularizer=self._kernel_regularizer,
-          bias_regularizer=self._bias_regularizer)(
-              net[str(level)])
-      x = self._norm(
-          axis=self._bn_axis,
-          momentum=self._norm_momentum,
-          epsilon=self._norm_epsilon)(
-              x)
-      x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
-      endpoints[str(level)] = x
-    return endpoints
-  def _resample_with_sepconv(self, inputs, input_width, target_width,
-                             target_num_filters):
-    """Matches resolution and feature dimension."""
-    x = inputs
-    # Spatial resampling.
-    if input_width > target_width:
-      while input_width > target_width:
-        x = layers.DepthwiseConv2D(
-            kernel_size=3,
-            strides=2,
-            padding='SAME',
-            use_bias=False,
-            kernel_initializer=self._kernel_initializer,
-            kernel_regularizer=self._kernel_regularizer,
-            bias_regularizer=self._bias_regularizer)(
-                x)
-        x = self._norm(
-            axis=self._bn_axis,
-            momentum=self._norm_momentum,
-            epsilon=self._norm_epsilon)(
-                x)
-        x = tf_utils.get_activation(
-            self._activation, use_keras_layer=True)(x)
-        input_width /= 2
-    elif input_width < target_width:
-      scale = target_width // input_width
-      x = spatial_transform_ops.nearest_upsampling(
-          x, scale=scale, use_keras_layer=self._use_keras_upsampling_2d)
-    # Last 1x1 conv to match filter size.
-    x = layers.Conv2D(
-        filters=target_num_filters,
-        kernel_size=1,
-        strides=1,
-        use_bias=False,
-        kernel_initializer=self._kernel_initializer,
-        kernel_regularizer=self._kernel_regularizer,
-        bias_regularizer=self._bias_regularizer)(
-            x)
-    x = self._norm(
-        axis=self._bn_axis,
-        momentum=self._norm_momentum,
-        epsilon=self._norm_epsilon)(
-            x)
-    return x
-  def get_config(self):
-    config_dict = {
-        'min_level': self._min_level,
-        'max_level': self._max_level,
-        'endpoints_num_filters': self._endpoints_num_filters,
-        'se_ratio': self._se_ratio,
-        'expand_ratio': self._expand_ratio,
-        'block_repeats': self._block_repeats,
-        'filter_size_scale': self._filter_size_scale,
-        'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
-        'kernel_initializer': self._kernel_initializer,
-        'kernel_regularizer': self._kernel_regularizer,
-        'bias_regularizer': self._bias_regularizer,
-        'activation': self._activation,
-        'use_sync_bn': self._use_sync_bn,
-        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon,
-        'use_keras_upsampling_2d': self._use_keras_upsampling_2d,
-    }
-    return config_dict
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-  @property
-  def output_specs(self):
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-@factory.register_backbone_builder('spinenet_mobile')
-def build_spinenet_mobile(
-    input_specs: tf.keras.layers.InputSpec,
-    backbone_config: hyperparams.Config,
-    norm_activation_config: hyperparams.Config,
-    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
-  """Builds Mobile SpineNet backbone from a config."""
-  backbone_type = backbone_config.type
-  backbone_cfg = backbone_config.get()
-  assert backbone_type == 'spinenet_mobile', (f'Inconsistent backbone type '
-                                              f'{backbone_type}')
-  model_id = backbone_cfg.model_id
-  if model_id not in SCALING_MAP:
-    raise ValueError(
-        'Mobile SpineNet-{} is not a valid architecture.'.format(model_id))
-  scaling_params = SCALING_MAP[model_id]
-  return SpineNetMobile(
-      input_specs=input_specs,
-      min_level=backbone_cfg.min_level,
-      max_level=backbone_cfg.max_level,
-      endpoints_num_filters=scaling_params['endpoints_num_filters'],
-      block_repeats=scaling_params['block_repeats'],
-      filter_size_scale=scaling_params['filter_size_scale'],
-      se_ratio=backbone_cfg.se_ratio,
-      expand_ratio=backbone_cfg.expand_ratio,
-      init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
-      kernel_regularizer=l2_regularizer,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      use_keras_upsampling_2d=backbone_cfg.use_keras_upsampling_2d)