Merge branch 'tensorflow:master' into panoptic-deeplab-modeling

0225b135 · Srihari Humbarwadi · GitHub · 7479dbb8 · 4c571a3c · 0225b135
Unverified Commit 0225b135 authored Mar 05, 2022 by Srihari Humbarwadi Committed by GitHub Mar 05, 2022
20 changed files
--- a/official/vision/modeling/backbones/resnet_deeplab.py
+++ b/official/vision/modeling/backbones/resnet_deeplab.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definitions of Residual Networks with Deeplab modifications."""
+
+from typing import Callable, Optional, Tuple, List
+
+import numpy as np
+import tensorflow as tf
+from official.modeling import hyperparams
+from official.modeling import tf_utils
+from official.vision.modeling.backbones import factory
+from official.vision.modeling.layers import nn_blocks
+from official.vision.modeling.layers import nn_layers
+
+layers = tf.keras.layers
+
+# Specifications for different ResNet variants.
+# Each entry specifies block configurations of the particular ResNet variant.
+# Each element in the block configuration is in the following format:
+# (block_fn, num_filters, block_repeats)
+RESNET_SPECS = {
+    50: [
+        ('bottleneck', 64, 3),
+        ('bottleneck', 128, 4),
+        ('bottleneck', 256, 6),
+        ('bottleneck', 512, 3),
+    ],
+    101: [
+        ('bottleneck', 64, 3),
+        ('bottleneck', 128, 4),
+        ('bottleneck', 256, 23),
+        ('bottleneck', 512, 3),
+    ],
+}
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class DilatedResNet(tf.keras.Model):
+  """Creates a ResNet model with Deeplabv3 modifications.
+
+  This backbone is suitable for semantic segmentation. This implements
+    Liang-Chieh Chen, George Papandreou, Florian Schroff, Hartwig Adam.
+    Rethinking Atrous Convolution for Semantic Image Segmentation.
+    (https://arxiv.org/pdf/1706.05587)
+  """
+
+  def __init__(
+      self,
+      model_id: int,
+      output_stride: int,
+      input_specs: tf.keras.layers.InputSpec = layers.InputSpec(
+          shape=[None, None, None, 3]),
+      stem_type: str = 'v0',
+      se_ratio: Optional[float] = None,
+      init_stochastic_depth_rate: float = 0.0,
+      multigrid: Optional[Tuple[int]] = None,
+      last_stage_repeats: int = 1,
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a ResNet model with DeepLab modification.
+
+    Args:
+      model_id: An `int` specifies depth of ResNet backbone model.
+      output_stride: An `int` of output stride, ratio of input to output
+        resolution.
+      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
+      stem_type: A `str` of stem type. Can be `v0` or `v1`. `v1` replaces 7x7
+        conv by 3 3x3 convs.
+      se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
+      init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
+      multigrid: A tuple of the same length as the number of blocks in the last
+        resnet stage.
+      last_stage_repeats: An `int` that specifies how many times last stage is
+        repeated.
+      activation: A `str` name of the activation function.
+      use_sync_bn: If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_initializer: A str for kernel initializer of convolutional layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+        Default to None.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    self._model_id = model_id
+    self._output_stride = output_stride
+    self._input_specs = input_specs
+    self._use_sync_bn = use_sync_bn
+    self._activation = activation
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    if use_sync_bn:
+      self._norm = layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = layers.BatchNormalization
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._stem_type = stem_type
+    self._se_ratio = se_ratio
+    self._init_stochastic_depth_rate = init_stochastic_depth_rate
+
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      bn_axis = -1
+    else:
+      bn_axis = 1
+
+    # Build ResNet.
+    inputs = tf.keras.Input(shape=input_specs.shape[1:])
+
+    if stem_type == 'v0':
+      x = layers.Conv2D(
+          filters=64,
+          kernel_size=7,
+          strides=2,
+          use_bias=False,
+          padding='same',
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)(
+              inputs)
+      x = self._norm(
+          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
+              x)
+      x = tf_utils.get_activation(activation)(x)
+    elif stem_type == 'v1':
+      x = layers.Conv2D(
+          filters=64,
+          kernel_size=3,
+          strides=2,
+          use_bias=False,
+          padding='same',
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)(
+              inputs)
+      x = self._norm(
+          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
+              x)
+      x = tf_utils.get_activation(activation)(x)
+      x = layers.Conv2D(
+          filters=64,
+          kernel_size=3,
+          strides=1,
+          use_bias=False,
+          padding='same',
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)(
+              x)
+      x = self._norm(
+          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
+              x)
+      x = tf_utils.get_activation(activation)(x)
+      x = layers.Conv2D(
+          filters=128,
+          kernel_size=3,
+          strides=1,
+          use_bias=False,
+          padding='same',
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)(
+              x)
+      x = self._norm(
+          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
+              x)
+      x = tf_utils.get_activation(activation)(x)
+    else:
+      raise ValueError('Stem type {} not supported.'.format(stem_type))
+
+    x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
+
+    normal_resnet_stage = int(np.math.log2(self._output_stride)) - 2
+
+    endpoints = {}
+    for i in range(normal_resnet_stage + 1):
+      spec = RESNET_SPECS[model_id][i]
+      if spec[0] == 'bottleneck':
+        block_fn = nn_blocks.BottleneckBlock
+      else:
+        raise ValueError('Block fn `{}` is not supported.'.format(spec[0]))
+      x = self._block_group(
+          inputs=x,
+          filters=spec[1],
+          strides=(1 if i == 0 else 2),
+          dilation_rate=1,
+          block_fn=block_fn,
+          block_repeats=spec[2],
+          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
+              self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats),
+          name='block_group_l{}'.format(i + 2))
+      endpoints[str(i + 2)] = x
+
+    dilation_rate = 2
+    for i in range(normal_resnet_stage + 1, 3 + last_stage_repeats):
+      spec = RESNET_SPECS[model_id][i] if i < 3 else RESNET_SPECS[model_id][-1]
+      if spec[0] == 'bottleneck':
+        block_fn = nn_blocks.BottleneckBlock
+      else:
+        raise ValueError('Block fn `{}` is not supported.'.format(spec[0]))
+      x = self._block_group(
+          inputs=x,
+          filters=spec[1],
+          strides=1,
+          dilation_rate=dilation_rate,
+          block_fn=block_fn,
+          block_repeats=spec[2],
+          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
+              self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats),
+          multigrid=multigrid if i >= 3 else None,
+          name='block_group_l{}'.format(i + 2))
+      dilation_rate *= 2
+
+    endpoints[str(normal_resnet_stage + 2)] = x
+
+    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
+
+    super(DilatedResNet, self).__init__(
+        inputs=inputs, outputs=endpoints, **kwargs)
+
+  def _block_group(self,
+                   inputs: tf.Tensor,
+                   filters: int,
+                   strides: int,
+                   dilation_rate: int,
+                   block_fn: Callable[..., tf.keras.layers.Layer],
+                   block_repeats: int = 1,
+                   stochastic_depth_drop_rate: float = 0.0,
+                   multigrid: Optional[List[int]] = None,
+                   name: str = 'block_group'):
+    """Creates one group of blocks for the ResNet model.
+
+    Deeplab applies strides at the last block.
+
+    Args:
+      inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
+      filters: An `int` off number of filters for the first convolution of the
+        layer.
+      strides: An `int` of stride to use for the first convolution of the layer.
+        If greater than 1, this layer will downsample the input.
+      dilation_rate: An `int` of diluted convolution rates.
+      block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`.
+      block_repeats: An `int` of number of blocks contained in the layer.
+      stochastic_depth_drop_rate: A `float` of drop rate of the current block
+        group.
+      multigrid: A list of `int` or None. If specified, dilation rates for each
+        block is scaled up by its corresponding factor in the multigrid.
+      name: A `str` name for the block.
+
+    Returns:
+      The output `tf.Tensor` of the block layer.
+    """
+    if multigrid is not None and len(multigrid) != block_repeats:
+      raise ValueError('multigrid has to match number of block_repeats')
+
+    if multigrid is None:
+      multigrid = [1] * block_repeats
+
+    # TODO(arashwan): move striding at the of the block.
+    x = block_fn(
+        filters=filters,
+        strides=strides,
+        dilation_rate=dilation_rate * multigrid[0],
+        use_projection=True,
+        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
+        se_ratio=self._se_ratio,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=self._activation,
+        use_sync_bn=self._use_sync_bn,
+        norm_momentum=self._norm_momentum,
+        norm_epsilon=self._norm_epsilon)(
+            inputs)
+    for i in range(1, block_repeats):
+      x = block_fn(
+          filters=filters,
+          strides=1,
+          dilation_rate=dilation_rate * multigrid[i],
+          use_projection=False,
+          stochastic_depth_drop_rate=stochastic_depth_drop_rate,
+          se_ratio=self._se_ratio,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=self._activation,
+          use_sync_bn=self._use_sync_bn,
+          norm_momentum=self._norm_momentum,
+          norm_epsilon=self._norm_epsilon)(
+              x)
+
+    return tf.identity(x, name=name)
+
+  def get_config(self):
+    config_dict = {
+        'model_id': self._model_id,
+        'output_stride': self._output_stride,
+        'stem_type': self._stem_type,
+        'se_ratio': self._se_ratio,
+        'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+    }
+    return config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
+
+  @property
+  def output_specs(self):
+    """A dict of {level: TensorShape} pairs for the model output."""
+    return self._output_specs
+
+
+@factory.register_backbone_builder('dilated_resnet')
+def build_dilated_resnet(
+    input_specs: tf.keras.layers.InputSpec,
+    backbone_config: hyperparams.Config,
+    norm_activation_config: hyperparams.Config,
+    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
+  """Builds ResNet backbone from a config."""
+  backbone_type = backbone_config.type
+  backbone_cfg = backbone_config.get()
+  assert backbone_type == 'dilated_resnet', (f'Inconsistent backbone type '
+                                             f'{backbone_type}')
+
+  return DilatedResNet(
+      model_id=backbone_cfg.model_id,
+      output_stride=backbone_cfg.output_stride,
+      input_specs=input_specs,
+      stem_type=backbone_cfg.stem_type,
+      se_ratio=backbone_cfg.se_ratio,
+      init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
+      multigrid=backbone_cfg.multigrid,
+      last_stage_repeats=backbone_cfg.last_stage_repeats,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer)
--- a/official/vision/modeling/backbones/resnet_deeplab_test.py
+++ b/official/vision/modeling/backbones/resnet_deeplab_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for resnet_deeplab models."""
+
+# Import libraries
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import strategy_combinations
+from official.vision.modeling.backbones import resnet_deeplab
+
+
+class ResNetTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (128, 50, 4, 8),
+      (128, 101, 4, 8),
+      (128, 50, 4, 16),
+      (128, 101, 4, 16),
+  )
+  def test_network_creation(self, input_size, model_id,
+                            endpoint_filter_scale, output_stride):
+    """Test creation of ResNet models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    network = resnet_deeplab.DilatedResNet(model_id=model_id,
+                                           output_stride=output_stride)
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+    print(endpoints)
+    self.assertAllEqual([
+        1, input_size / output_stride, input_size / output_stride,
+        512 * endpoint_filter_scale
+    ], endpoints[str(int(np.math.log2(output_stride)))].shape.as_list())
+
+  @parameterized.parameters(
+      ('v0', None, 0.0),
+      ('v1', None, 0.0),
+      ('v1', 0.25, 0.0),
+      ('v1', 0.25, 0.2),
+  )
+  def test_network_features(self, stem_type, se_ratio,
+                            init_stochastic_depth_rate):
+    """Test additional features of ResNet models."""
+    input_size = 128
+    model_id = 50
+    endpoint_filter_scale = 4
+    output_stride = 8
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    network = resnet_deeplab.DilatedResNet(
+        model_id=model_id,
+        output_stride=output_stride,
+        stem_type=stem_type,
+        se_ratio=se_ratio,
+        init_stochastic_depth_rate=init_stochastic_depth_rate)
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+    print(endpoints)
+    self.assertAllEqual([
+        1, input_size / output_stride, input_size / output_stride,
+        512 * endpoint_filter_scale
+    ], endpoints[str(int(np.math.log2(output_stride)))].shape.as_list())
+
+  @combinations.generate(
+      combinations.combine(
+          strategy=[
+              strategy_combinations.cloud_tpu_strategy,
+              strategy_combinations.one_device_strategy_gpu,
+          ],
+          use_sync_bn=[False, True],
+      ))
+  def test_sync_bn_multiple_devices(self, strategy, use_sync_bn):
+    """Test for sync bn on TPU and GPU devices."""
+    inputs = np.random.rand(64, 128, 128, 3)
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    with strategy.scope():
+      network = resnet_deeplab.DilatedResNet(
+          model_id=50, output_stride=8, use_sync_bn=use_sync_bn)
+      _ = network(inputs)
+
+  @parameterized.parameters(1, 3, 4)
+  def test_input_specs(self, input_dim):
+    """Test different input feature dimensions."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
+    network = resnet_deeplab.DilatedResNet(
+        model_id=50, output_stride=8, input_specs=input_specs)
+
+    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
+    _ = network(inputs)
+
+  def test_serialize_deserialize(self):
+    # Create a network object that sets all of its config options.
+    kwargs = dict(
+        model_id=50,
+        output_stride=8,
+        stem_type='v0',
+        se_ratio=0.25,
+        init_stochastic_depth_rate=0.2,
+        use_sync_bn=False,
+        activation='relu',
+        norm_momentum=0.99,
+        norm_epsilon=0.001,
+        kernel_initializer='VarianceScaling',
+        kernel_regularizer=None,
+        bias_regularizer=None,
+    )
+    network = resnet_deeplab.DilatedResNet(**kwargs)
+
+    expected_config = dict(kwargs)
+    self.assertEqual(network.get_config(), expected_config)
+
+    # Create another network object from the first object's config.
+    new_network = resnet_deeplab.DilatedResNet.from_config(network.get_config())
+
+    # Validate that the config can be forced to JSON.
+    _ = new_network.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(network.get_config(), new_network.get_config())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/modeling/backbones/resnet_test.py
+++ b/official/vision/modeling/backbones/resnet_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for resnet."""
+
+# Import libraries
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import strategy_combinations
+from official.vision.modeling.backbones import resnet
+
+
+class ResNetTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (128, 10, 1),
+      (128, 18, 1),
+      (128, 34, 1),
+      (128, 50, 4),
+      (128, 101, 4),
+      (128, 152, 4),
+  )
+  def test_network_creation(self, input_size, model_id,
+                            endpoint_filter_scale):
+    """Test creation of ResNet family models."""
+    resnet_params = {
+        10: 4915904,
+        18: 11190464,
+        34: 21306048,
+        50: 23561152,
+        101: 42605504,
+        152: 58295232,
+    }
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    network = resnet.ResNet(model_id=model_id)
+    self.assertEqual(network.count_params(), resnet_params[model_id])
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+
+    self.assertAllEqual(
+        [1, input_size / 2**2, input_size / 2**2, 64 * endpoint_filter_scale],
+        endpoints['2'].shape.as_list())
+    self.assertAllEqual(
+        [1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale],
+        endpoints['3'].shape.as_list())
+    self.assertAllEqual(
+        [1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale],
+        endpoints['4'].shape.as_list())
+    self.assertAllEqual(
+        [1, input_size / 2**5, input_size / 2**5, 512 * endpoint_filter_scale],
+        endpoints['5'].shape.as_list())
+
+  @combinations.generate(
+      combinations.combine(
+          strategy=[
+              strategy_combinations.cloud_tpu_strategy,
+              strategy_combinations.one_device_strategy_gpu,
+          ],
+          use_sync_bn=[False, True],
+      ))
+  def test_sync_bn_multiple_devices(self, strategy, use_sync_bn):
+    """Test for sync bn on TPU and GPU devices."""
+    inputs = np.random.rand(64, 128, 128, 3)
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    with strategy.scope():
+      network = resnet.ResNet(model_id=50, use_sync_bn=use_sync_bn)
+      _ = network(inputs)
+
+  @parameterized.parameters(
+      (128, 34, 1, 'v0', None, 0.0, 1.0, False, False),
+      (128, 34, 1, 'v1', 0.25, 0.2, 1.25, True, True),
+      (128, 50, 4, 'v0', None, 0.0, 1.5, False, False),
+      (128, 50, 4, 'v1', 0.25, 0.2, 2.0, True, True),
+  )
+  def test_resnet_rs(self, input_size, model_id, endpoint_filter_scale,
+                     stem_type, se_ratio, init_stochastic_depth_rate,
+                     depth_multiplier, resnetd_shortcut, replace_stem_max_pool):
+    """Test creation of ResNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+    network = resnet.ResNet(
+        model_id=model_id,
+        depth_multiplier=depth_multiplier,
+        stem_type=stem_type,
+        resnetd_shortcut=resnetd_shortcut,
+        replace_stem_max_pool=replace_stem_max_pool,
+        se_ratio=se_ratio,
+        init_stochastic_depth_rate=init_stochastic_depth_rate)
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    _ = network(inputs)
+
+  @parameterized.parameters(1, 3, 4)
+  def test_input_specs(self, input_dim):
+    """Test different input feature dimensions."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
+    network = resnet.ResNet(model_id=50, input_specs=input_specs)
+
+    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
+    _ = network(inputs)
+
+  def test_serialize_deserialize(self):
+    # Create a network object that sets all of its config options.
+    kwargs = dict(
+        model_id=50,
+        depth_multiplier=1.0,
+        stem_type='v0',
+        se_ratio=None,
+        resnetd_shortcut=False,
+        replace_stem_max_pool=False,
+        init_stochastic_depth_rate=0.0,
+        scale_stem=True,
+        use_sync_bn=False,
+        activation='relu',
+        norm_momentum=0.99,
+        norm_epsilon=0.001,
+        kernel_initializer='VarianceScaling',
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        bn_trainable=True)
+    network = resnet.ResNet(**kwargs)
+
+    expected_config = dict(kwargs)
+    self.assertEqual(network.get_config(), expected_config)
+
+    # Create another network object from the first object's config.
+    new_network = resnet.ResNet.from_config(network.get_config())
+
+    # Validate that the config can be forced to JSON.
+    _ = new_network.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(network.get_config(), new_network.get_config())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/modeling/backbones/revnet.py
+++ b/official/vision/modeling/backbones/revnet.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Contains definitions of RevNet."""
+
+from typing import Any, Callable, Dict, Optional
+# Import libraries
+import tensorflow as tf
+from official.modeling import hyperparams
+from official.modeling import tf_utils
+from official.vision.modeling.backbones import factory
+from official.vision.modeling.layers import nn_blocks
+
+
+# Specifications for different RevNet variants.
+# Each entry specifies block configurations of the particular RevNet variant.
+# Each element in the block configuration is in the following format:
+# (block_fn, num_filters, block_repeats)
+REVNET_SPECS = {
+    38: [
+        ('residual', 32, 3),
+        ('residual', 64, 3),
+        ('residual', 112, 3),
+    ],
+    56: [
+        ('bottleneck', 128, 2),
+        ('bottleneck', 256, 2),
+        ('bottleneck', 512, 3),
+        ('bottleneck', 832, 2),
+    ],
+    104: [
+        ('bottleneck', 128, 2),
+        ('bottleneck', 256, 2),
+        ('bottleneck', 512, 11),
+        ('bottleneck', 832, 2),
+    ],
+}
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class RevNet(tf.keras.Model):
+  """Creates a Reversible ResNet (RevNet) family model.
+
+  This implements:
+    Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse.
+    The Reversible Residual Network: Backpropagation Without Storing
+    Activations.
+    (https://arxiv.org/pdf/1707.04585.pdf)
+  """
+
+  def __init__(
+      self,
+      model_id: int,
+      input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec(
+          shape=[None, None, None, 3]),
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a RevNet model.
+
+    Args:
+      model_id: An `int` of depth/id of ResNet backbone model.
+      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
+      activation: A `str` name of the activation function.
+      use_sync_bn: If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_initializer: A str for kernel initializer of convolutional layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    self._model_id = model_id
+    self._input_specs = input_specs
+    self._use_sync_bn = use_sync_bn
+    self._activation = activation
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    if use_sync_bn:
+      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = tf.keras.layers.BatchNormalization
+
+    axis = -1 if tf.keras.backend.image_data_format() == 'channels_last' else 1
+
+    # Build RevNet.
+    inputs = tf.keras.Input(shape=input_specs.shape[1:])
+
+    x = tf.keras.layers.Conv2D(
+        filters=REVNET_SPECS[model_id][0][1],
+        kernel_size=7, strides=2, use_bias=False, padding='same',
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer)(inputs)
+    x = self._norm(
+        axis=axis, momentum=norm_momentum, epsilon=norm_epsilon)(x)
+    x = tf_utils.get_activation(activation)(x)
+    x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
+
+    endpoints = {}
+    for i, spec in enumerate(REVNET_SPECS[model_id]):
+      if spec[0] == 'residual':
+        inner_block_fn = nn_blocks.ResidualInner
+      elif spec[0] == 'bottleneck':
+        inner_block_fn = nn_blocks.BottleneckResidualInner
+      else:
+        raise ValueError('Block fn `{}` is not supported.'.format(spec[0]))
+
+      if spec[1] % 2 != 0:
+        raise ValueError('Number of output filters must be even to ensure '
+                         'splitting in channel dimension for reversible blocks')
+
+      x = self._block_group(
+          inputs=x,
+          filters=spec[1],
+          strides=(1 if i == 0 else 2),
+          inner_block_fn=inner_block_fn,
+          block_repeats=spec[2],
+          batch_norm_first=(i != 0),  # Only skip on first block
+          name='revblock_group_{}'.format(i + 2))
+      endpoints[str(i + 2)] = x
+
+    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
+
+    super(RevNet, self).__init__(inputs=inputs, outputs=endpoints, **kwargs)
+
+  def _block_group(self,
+                   inputs: tf.Tensor,
+                   filters: int,
+                   strides: int,
+                   inner_block_fn: Callable[..., tf.keras.layers.Layer],
+                   block_repeats: int,
+                   batch_norm_first: bool,
+                   name: str = 'revblock_group') -> tf.Tensor:
+    """Creates one reversible block for RevNet model.
+
+    Args:
+      inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
+      filters: An `int` number of filters for the first convolution of the
+        layer.
+      strides: An `int` stride to use for the first convolution of the layer. If
+        greater than 1, this block group will downsample the input.
+      inner_block_fn: Either `nn_blocks.ResidualInner` or
+        `nn_blocks.BottleneckResidualInner`.
+      block_repeats: An `int` number of blocks contained in this block group.
+      batch_norm_first: A `bool` that specifies whether to apply
+        BatchNormalization and activation layer before feeding into convolution
+        layers.
+      name: A `str` name for the block.
+
+    Returns:
+      The output `tf.Tensor` of the block layer.
+    """
+    x = inputs
+    for i in range(block_repeats):
+      is_first_block = i == 0
+      # Only first residual layer in block gets downsampled
+      curr_strides = strides if is_first_block else 1
+      f = inner_block_fn(
+          filters=filters // 2,
+          strides=curr_strides,
+          batch_norm_first=batch_norm_first and is_first_block,
+          kernel_regularizer=self._kernel_regularizer)
+      g = inner_block_fn(
+          filters=filters // 2,
+          strides=1,
+          batch_norm_first=batch_norm_first and is_first_block,
+          kernel_regularizer=self._kernel_regularizer)
+      x = nn_blocks.ReversibleLayer(f, g)(x)
+
+    return tf.identity(x, name=name)
+
+  def get_config(self) -> Dict[str, Any]:
+    config_dict = {
+        'model_id': self._model_id,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+    }
+    return config_dict
+
+  @classmethod
+  def from_config(cls,
+                  config: Dict[str, Any],
+                  custom_objects: Optional[Any] = None) -> tf.keras.Model:
+    return cls(**config)
+
+  @property
+  def output_specs(self) -> Dict[int, tf.TensorShape]:
+    """A dict of {level: TensorShape} pairs for the model output."""
+    return self._output_specs
+
+
+@factory.register_backbone_builder('revnet')
+def build_revnet(
+    input_specs: tf.keras.layers.InputSpec,
+    backbone_config: hyperparams.Config,
+    norm_activation_config: hyperparams.Config,
+    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
+  """Builds RevNet backbone from a config."""
+  backbone_type = backbone_config.type
+  backbone_cfg = backbone_config.get()
+  assert backbone_type == 'revnet', (f'Inconsistent backbone type '
+                                     f'{backbone_type}')
+
+  return RevNet(
+      model_id=backbone_cfg.model_id,
+      input_specs=input_specs,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer)
--- a/official/vision/modeling/backbones/revnet_test.py
+++ b/official/vision/modeling/backbones/revnet_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for RevNet."""
+
+# Import libraries
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.vision.modeling.backbones import revnet
+
+
+class RevNetTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (128, 56, 4),
+      (128, 104, 4),
+  )
+  def test_network_creation(self, input_size, model_id,
+                            endpoint_filter_scale):
+    """Test creation of RevNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    network = revnet.RevNet(model_id=model_id)
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = network(inputs)
+    network.summary()
+
+    self.assertAllEqual(
+        [1, input_size / 2**2, input_size / 2**2, 128 * endpoint_filter_scale],
+        endpoints['2'].shape.as_list())
+    self.assertAllEqual(
+        [1, input_size / 2**3, input_size / 2**3, 256 * endpoint_filter_scale],
+        endpoints['3'].shape.as_list())
+    self.assertAllEqual(
+        [1, input_size / 2**4, input_size / 2**4, 512 * endpoint_filter_scale],
+        endpoints['4'].shape.as_list())
+    self.assertAllEqual(
+        [1, input_size / 2**5, input_size / 2**5, 832 * endpoint_filter_scale],
+        endpoints['5'].shape.as_list())
+
+  @parameterized.parameters(1, 3, 4)
+  def test_input_specs(self, input_dim):
+    """Test different input feature dimensions."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
+    network = revnet.RevNet(model_id=56, input_specs=input_specs)
+
+    inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
+    _ = network(inputs)
+
+  def test_serialize_deserialize(self):
+    # Create a network object that sets all of its config options.
+    kwargs = dict(
+        model_id=56,
+        activation='relu',
+        use_sync_bn=False,
+        norm_momentum=0.99,
+        norm_epsilon=0.001,
+        kernel_initializer='VarianceScaling',
+        kernel_regularizer=None,
+    )
+    network = revnet.RevNet(**kwargs)
+
+    expected_config = dict(kwargs)
+    self.assertEqual(network.get_config(), expected_config)
+
+    # Create another network object from the first object's config.
+    new_network = revnet.RevNet.from_config(network.get_config())
+
+    # Validate that the config can be forced to JSON.
+    _ = new_network.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(network.get_config(), new_network.get_config())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/modeling/backbones/spinenet.py
+++ b/official/vision/modeling/backbones/spinenet.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definitions of SpineNet Networks."""
+
+import math
+from typing import Any, List, Optional, Tuple
+
+# Import libraries
+
+from absl import logging
+import tensorflow as tf
+
+from official.modeling import hyperparams
+from official.modeling import tf_utils
+from official.vision.modeling.backbones import factory
+from official.vision.modeling.layers import nn_blocks
+from official.vision.modeling.layers import nn_layers
+from official.vision.ops import spatial_transform_ops
+
+layers = tf.keras.layers
+
+FILTER_SIZE_MAP = {
+    1: 32,
+    2: 64,
+    3: 128,
+    4: 256,
+    5: 256,
+    6: 256,
+    7: 256,
+}
+
+# The fixed SpineNet architecture discovered by NAS.
+# Each element represents a specification of a building block:
+#   (block_level, block_fn, (input_offset0, input_offset1), is_output).
+SPINENET_BLOCK_SPECS = [
+    (2, 'bottleneck', (0, 1), False),
+    (4, 'residual', (0, 1), False),
+    (3, 'bottleneck', (2, 3), False),
+    (4, 'bottleneck', (2, 4), False),
+    (6, 'residual', (3, 5), False),
+    (4, 'bottleneck', (3, 5), False),
+    (5, 'residual', (6, 7), False),
+    (7, 'residual', (6, 8), False),
+    (5, 'bottleneck', (8, 9), False),
+    (5, 'bottleneck', (8, 10), False),
+    (4, 'bottleneck', (5, 10), True),
+    (3, 'bottleneck', (4, 10), True),
+    (5, 'bottleneck', (7, 12), True),
+    (7, 'bottleneck', (5, 14), True),
+    (6, 'bottleneck', (12, 14), True),
+    (2, 'bottleneck', (2, 13), True),
+]
+
+SCALING_MAP = {
+    '49S': {
+        'endpoints_num_filters': 128,
+        'filter_size_scale': 0.65,
+        'resample_alpha': 0.5,
+        'block_repeats': 1,
+    },
+    '49': {
+        'endpoints_num_filters': 256,
+        'filter_size_scale': 1.0,
+        'resample_alpha': 0.5,
+        'block_repeats': 1,
+    },
+    '96': {
+        'endpoints_num_filters': 256,
+        'filter_size_scale': 1.0,
+        'resample_alpha': 0.5,
+        'block_repeats': 2,
+    },
+    '143': {
+        'endpoints_num_filters': 256,
+        'filter_size_scale': 1.0,
+        'resample_alpha': 1.0,
+        'block_repeats': 3,
+    },
+    # SpineNet-143 with 1.3x filter_size_scale.
+    '143L': {
+        'endpoints_num_filters': 256,
+        'filter_size_scale': 1.3,
+        'resample_alpha': 1.0,
+        'block_repeats': 3,
+    },
+    '190': {
+        'endpoints_num_filters': 512,
+        'filter_size_scale': 1.3,
+        'resample_alpha': 1.0,
+        'block_repeats': 4,
+    },
+}
+
+
+class BlockSpec(object):
+  """A container class that specifies the block configuration for SpineNet."""
+
+  def __init__(self, level: int, block_fn: str, input_offsets: Tuple[int, int],
+               is_output: bool):
+    self.level = level
+    self.block_fn = block_fn
+    self.input_offsets = input_offsets
+    self.is_output = is_output
+
+
+def build_block_specs(
+    block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]:
+  """Builds the list of BlockSpec objects for SpineNet."""
+  if not block_specs:
+    block_specs = SPINENET_BLOCK_SPECS
+  logging.info('Building SpineNet block specs: %s', block_specs)
+  return [BlockSpec(*b) for b in block_specs]
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class SpineNet(tf.keras.Model):
+  """Creates a SpineNet family model.
+
+  This implements:
+    Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Golnaz Ghiasi, Mingxing Tan,
+    Yin Cui, Quoc V. Le, Xiaodan Song.
+    SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization.
+    (https://arxiv.org/abs/1912.05027)
+  """
+
+  def __init__(
+      self,
+      input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec(
+          shape=[None, None, None, 3]),
+      min_level: int = 3,
+      max_level: int = 7,
+      block_specs: List[BlockSpec] = build_block_specs(),
+      endpoints_num_filters: int = 256,
+      resample_alpha: float = 0.5,
+      block_repeats: int = 1,
+      filter_size_scale: float = 1.0,
+      init_stochastic_depth_rate: float = 0.0,
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      **kwargs):
+    """Initializes a SpineNet model.
+
+    Args:
+      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
+      min_level: An `int` of min level for output mutiscale features.
+      max_level: An `int` of max level for output mutiscale features.
+      block_specs: A list of block specifications for the SpineNet model
+        discovered by NAS.
+      endpoints_num_filters: An `int` of feature dimension for the output
+        endpoints.
+      resample_alpha: A `float` of resampling factor in cross-scale connections.
+      block_repeats: An `int` of number of blocks contained in the layer.
+      filter_size_scale: A `float` of multiplier for the filters (number of
+        channels) for all convolution ops. The value must be greater than zero.
+        Typical usage will be to set this value in (0, 1) to reduce the number
+        of parameters or computation cost of the model.
+      init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
+      kernel_initializer: A str for kernel initializer of convolutional layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+        Default to None.
+      activation: A `str` name of the activation function.
+      use_sync_bn: If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A small `float` added to variance to avoid dividing by zero.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    self._input_specs = input_specs
+    self._min_level = min_level
+    self._max_level = max_level
+    self._block_specs = block_specs
+    self._endpoints_num_filters = endpoints_num_filters
+    self._resample_alpha = resample_alpha
+    self._block_repeats = block_repeats
+    self._filter_size_scale = filter_size_scale
+    self._init_stochastic_depth_rate = init_stochastic_depth_rate
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._activation = activation
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    if activation == 'relu':
+      self._activation_fn = tf.nn.relu
+    elif activation == 'swish':
+      self._activation_fn = tf.nn.swish
+    else:
+      raise ValueError('Activation {} not implemented.'.format(activation))
+    self._init_block_fn = 'bottleneck'
+    self._num_init_blocks = 2
+
+    if use_sync_bn:
+      self._norm = layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = layers.BatchNormalization
+
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+
+    # Build SpineNet.
+    inputs = tf.keras.Input(shape=input_specs.shape[1:])
+
+    net = self._build_stem(inputs=inputs)
+    input_width = input_specs.shape[2]
+    if input_width is None:
+      max_stride = max(map(lambda b: b.level, block_specs))
+      input_width = 2 ** max_stride
+    net = self._build_scale_permuted_network(net=net, input_width=input_width)
+    endpoints = self._build_endpoints(net=net)
+
+    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
+    super(SpineNet, self).__init__(inputs=inputs, outputs=endpoints)
+
+  def _block_group(self,
+                   inputs: tf.Tensor,
+                   filters: int,
+                   strides: int,
+                   block_fn_cand: str,
+                   block_repeats: int = 1,
+                   stochastic_depth_drop_rate: Optional[float] = None,
+                   name: str = 'block_group'):
+    """Creates one group of blocks for the SpineNet model."""
+    block_fn_candidates = {
+        'bottleneck': nn_blocks.BottleneckBlock,
+        'residual': nn_blocks.ResidualBlock,
+    }
+    block_fn = block_fn_candidates[block_fn_cand]
+    _, _, _, num_filters = inputs.get_shape().as_list()
+
+    if block_fn_cand == 'bottleneck':
+      use_projection = not (num_filters == (filters * 4) and strides == 1)
+    else:
+      use_projection = not (num_filters == filters and strides == 1)
+
+    x = block_fn(
+        filters=filters,
+        strides=strides,
+        use_projection=use_projection,
+        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=self._activation,
+        use_sync_bn=self._use_sync_bn,
+        norm_momentum=self._norm_momentum,
+        norm_epsilon=self._norm_epsilon)(
+            inputs)
+    for _ in range(1, block_repeats):
+      x = block_fn(
+          filters=filters,
+          strides=1,
+          use_projection=False,
+          stochastic_depth_drop_rate=stochastic_depth_drop_rate,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=self._activation,
+          use_sync_bn=self._use_sync_bn,
+          norm_momentum=self._norm_momentum,
+          norm_epsilon=self._norm_epsilon)(
+              x)
+    return tf.identity(x, name=name)
+
+  def _build_stem(self, inputs):
+    """Builds SpineNet stem."""
+    x = layers.Conv2D(
+        filters=64,
+        kernel_size=7,
+        strides=2,
+        use_bias=False,
+        padding='same',
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)(
+            inputs)
+    x = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)(
+            x)
+    x = tf_utils.get_activation(self._activation_fn)(x)
+    x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
+
+    net = []
+    # Build the initial level 2 blocks.
+    for i in range(self._num_init_blocks):
+      x = self._block_group(
+          inputs=x,
+          filters=int(FILTER_SIZE_MAP[2] * self._filter_size_scale),
+          strides=1,
+          block_fn_cand=self._init_block_fn,
+          block_repeats=self._block_repeats,
+          name='stem_block_{}'.format(i + 1))
+      net.append(x)
+    return net
+
+  def _build_scale_permuted_network(self,
+                                    net,
+                                    input_width,
+                                    weighted_fusion=False):
+    """Builds scale-permuted network."""
+    net_sizes = [int(math.ceil(input_width / 2**2))] * len(net)
+    net_block_fns = [self._init_block_fn] * len(net)
+    num_outgoing_connections = [0] * len(net)
+
+    endpoints = {}
+    for i, block_spec in enumerate(self._block_specs):
+      # Find out specs for the target block.
+      target_width = int(math.ceil(input_width / 2**block_spec.level))
+      target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
+                               self._filter_size_scale)
+      target_block_fn = block_spec.block_fn
+
+      # Resample then merge input0 and input1.
+      parents = []
+      input0 = block_spec.input_offsets[0]
+      input1 = block_spec.input_offsets[1]
+
+      x0 = self._resample_with_alpha(
+          inputs=net[input0],
+          input_width=net_sizes[input0],
+          input_block_fn=net_block_fns[input0],
+          target_width=target_width,
+          target_num_filters=target_num_filters,
+          target_block_fn=target_block_fn,
+          alpha=self._resample_alpha)
+      parents.append(x0)
+      num_outgoing_connections[input0] += 1
+
+      x1 = self._resample_with_alpha(
+          inputs=net[input1],
+          input_width=net_sizes[input1],
+          input_block_fn=net_block_fns[input1],
+          target_width=target_width,
+          target_num_filters=target_num_filters,
+          target_block_fn=target_block_fn,
+          alpha=self._resample_alpha)
+      parents.append(x1)
+      num_outgoing_connections[input1] += 1
+
+      # Merge 0 outdegree blocks to the output block.
+      if block_spec.is_output:
+        for j, (j_feat,
+                j_connections) in enumerate(zip(net, num_outgoing_connections)):
+          if j_connections == 0 and (j_feat.shape[2] == target_width and
+                                     j_feat.shape[3] == x0.shape[3]):
+            parents.append(j_feat)
+            num_outgoing_connections[j] += 1
+
+      # pylint: disable=g-direct-tensorflow-import
+      if weighted_fusion:
+        dtype = parents[0].dtype
+        parent_weights = [
+            tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
+                i, j)), dtype=dtype)) for j in range(len(parents))]
+        weights_sum = tf.add_n(parent_weights)
+        parents = [
+            parents[i] * parent_weights[i] / (weights_sum + 0.0001)
+            for i in range(len(parents))
+        ]
+
+      # Fuse all parent nodes then build a new block.
+      x = tf_utils.get_activation(self._activation_fn)(tf.add_n(parents))
+      x = self._block_group(
+          inputs=x,
+          filters=target_num_filters,
+          strides=1,
+          block_fn_cand=target_block_fn,
+          block_repeats=self._block_repeats,
+          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
+              self._init_stochastic_depth_rate, i + 1, len(self._block_specs)),
+          name='scale_permuted_block_{}'.format(i + 1))
+
+      net.append(x)
+      net_sizes.append(target_width)
+      net_block_fns.append(target_block_fn)
+      num_outgoing_connections.append(0)
+
+      # Save output feats.
+      if block_spec.is_output:
+        if block_spec.level in endpoints:
+          raise ValueError('Duplicate feats found for output level {}.'.format(
+              block_spec.level))
+        if (block_spec.level < self._min_level or
+            block_spec.level > self._max_level):
+          logging.warning(
+              'SpineNet output level out of range [min_level, max_level] = '
+              '[%s, %s] will not be used for further processing.',
+              self._min_level, self._max_level)
+        endpoints[str(block_spec.level)] = x
+
+    return endpoints
+
+  def _build_endpoints(self, net):
+    """Matches filter size for endpoints before sharing conv layers."""
+    endpoints = {}
+    for level in range(self._min_level, self._max_level + 1):
+      x = layers.Conv2D(
+          filters=self._endpoints_num_filters,
+          kernel_size=1,
+          strides=1,
+          use_bias=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)(
+              net[str(level)])
+      x = self._norm(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)(
+              x)
+      x = tf_utils.get_activation(self._activation_fn)(x)
+      endpoints[str(level)] = x
+    return endpoints
+
+  def _resample_with_alpha(self,
+                           inputs,
+                           input_width,
+                           input_block_fn,
+                           target_width,
+                           target_num_filters,
+                           target_block_fn,
+                           alpha=0.5):
+    """Matches resolution and feature dimension."""
+    _, _, _, input_num_filters = inputs.get_shape().as_list()
+    if input_block_fn == 'bottleneck':
+      input_num_filters /= 4
+    new_num_filters = int(input_num_filters * alpha)
+
+    x = layers.Conv2D(
+        filters=new_num_filters,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)(
+            inputs)
+    x = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)(
+            x)
+    x = tf_utils.get_activation(self._activation_fn)(x)
+
+    # Spatial resampling.
+    if input_width > target_width:
+      x = layers.Conv2D(
+          filters=new_num_filters,
+          kernel_size=3,
+          strides=2,
+          padding='SAME',
+          use_bias=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)(
+              x)
+      x = self._norm(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)(
+              x)
+      x = tf_utils.get_activation(self._activation_fn)(x)
+      input_width /= 2
+      while input_width > target_width:
+        x = layers.MaxPool2D(pool_size=3, strides=2, padding='SAME')(x)
+        input_width /= 2
+    elif input_width < target_width:
+      scale = target_width // input_width
+      x = spatial_transform_ops.nearest_upsampling(x, scale=scale)
+
+    # Last 1x1 conv to match filter size.
+    if target_block_fn == 'bottleneck':
+      target_num_filters *= 4
+    x = layers.Conv2D(
+        filters=target_num_filters,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)(
+            x)
+    x = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)(
+            x)
+    return x
+
+  def get_config(self):
+    config_dict = {
+        'min_level': self._min_level,
+        'max_level': self._max_level,
+        'endpoints_num_filters': self._endpoints_num_filters,
+        'resample_alpha': self._resample_alpha,
+        'block_repeats': self._block_repeats,
+        'filter_size_scale': self._filter_size_scale,
+        'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    return config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
+
+  @property
+  def output_specs(self):
+    """A dict of {level: TensorShape} pairs for the model output."""
+    return self._output_specs
+
+
+@factory.register_backbone_builder('spinenet')
+def build_spinenet(
+    input_specs: tf.keras.layers.InputSpec,
+    backbone_config: hyperparams.Config,
+    norm_activation_config: hyperparams.Config,
+    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
+  """Builds SpineNet backbone from a config."""
+  backbone_type = backbone_config.type
+  backbone_cfg = backbone_config.get()
+  assert backbone_type == 'spinenet', (f'Inconsistent backbone type '
+                                       f'{backbone_type}')
+
+  model_id = backbone_cfg.model_id
+  if model_id not in SCALING_MAP:
+    raise ValueError(
+        'SpineNet-{} is not a valid architecture.'.format(model_id))
+  scaling_params = SCALING_MAP[model_id]
+
+  return SpineNet(
+      input_specs=input_specs,
+      min_level=backbone_cfg.min_level,
+      max_level=backbone_cfg.max_level,
+      endpoints_num_filters=scaling_params['endpoints_num_filters'],
+      resample_alpha=scaling_params['resample_alpha'],
+      block_repeats=scaling_params['block_repeats'],
+      filter_size_scale=scaling_params['filter_size_scale'],
+      init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
+      kernel_regularizer=l2_regularizer,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon)
--- a/official/vision/modeling/backbones/spinenet_mobile.py
+++ b/official/vision/modeling/backbones/spinenet_mobile.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains definitions of Mobile SpineNet Networks."""
+import math
+from typing import Any, List, Optional, Tuple
+
+# Import libraries
+
+from absl import logging
+import tensorflow as tf
+
+from official.modeling import hyperparams
+from official.modeling import tf_utils
+from official.vision.modeling.backbones import factory
+from official.vision.modeling.layers import nn_blocks
+from official.vision.modeling.layers import nn_layers
+from official.vision.ops import spatial_transform_ops
+
+layers = tf.keras.layers
+
+FILTER_SIZE_MAP = {
+    0: 8,
+    1: 16,
+    2: 24,
+    3: 40,
+    4: 80,
+    5: 112,
+    6: 112,
+    7: 112,
+}
+
+# The fixed SpineNet architecture discovered by NAS.
+# Each element represents a specification of a building block:
+#   (block_level, block_fn, (input_offset0, input_offset1), is_output).
+SPINENET_BLOCK_SPECS = [
+    (2, 'mbconv', (0, 1), False),
+    (2, 'mbconv', (1, 2), False),
+    (4, 'mbconv', (1, 2), False),
+    (3, 'mbconv', (3, 4), False),
+    (4, 'mbconv', (3, 5), False),
+    (6, 'mbconv', (4, 6), False),
+    (4, 'mbconv', (4, 6), False),
+    (5, 'mbconv', (7, 8), False),
+    (7, 'mbconv', (7, 9), False),
+    (5, 'mbconv', (9, 10), False),
+    (5, 'mbconv', (9, 11), False),
+    (4, 'mbconv', (6, 11), True),
+    (3, 'mbconv', (5, 11), True),
+    (5, 'mbconv', (8, 13), True),
+    (7, 'mbconv', (6, 15), True),
+    (6, 'mbconv', (13, 15), True),
+]
+
+SCALING_MAP = {
+    '49': {
+        'endpoints_num_filters': 48,
+        'filter_size_scale': 1.0,
+        'block_repeats': 1,
+    },
+    '49S': {
+        'endpoints_num_filters': 40,
+        'filter_size_scale': 0.65,
+        'block_repeats': 1,
+    },
+    '49XS': {
+        'endpoints_num_filters': 24,
+        'filter_size_scale': 0.6,
+        'block_repeats': 1,
+    },
+}
+
+
+class BlockSpec(object):
+  """A container class that specifies the block configuration for SpineNet."""
+
+  def __init__(self, level: int, block_fn: str, input_offsets: Tuple[int, int],
+               is_output: bool):
+    self.level = level
+    self.block_fn = block_fn
+    self.input_offsets = input_offsets
+    self.is_output = is_output
+
+
+def build_block_specs(
+    block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]:
+  """Builds the list of BlockSpec objects for SpineNet."""
+  if not block_specs:
+    block_specs = SPINENET_BLOCK_SPECS
+  logging.info('Building SpineNet block specs: %s', block_specs)
+  return [BlockSpec(*b) for b in block_specs]
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class SpineNetMobile(tf.keras.Model):
+  """Creates a Mobile SpineNet family model.
+
+  This implements:
+    [1] Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Golnaz Ghiasi, Mingxing Tan,
+    Yin Cui, Quoc V. Le, Xiaodan Song.
+    SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization.
+    (https://arxiv.org/abs/1912.05027).
+    [2] Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Yin Cui, Mingxing Tan,
+    Quoc Le, Xiaodan Song.
+    Efficient Scale-Permuted Backbone with Learned Resource Distribution.
+    (https://arxiv.org/abs/2010.11426).
+  """
+
+  def __init__(
+      self,
+      input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec(
+          shape=[None, None, None, 3]),
+      min_level: int = 3,
+      max_level: int = 7,
+      block_specs: List[BlockSpec] = build_block_specs(),
+      endpoints_num_filters: int = 256,
+      se_ratio: float = 0.2,
+      block_repeats: int = 1,
+      filter_size_scale: float = 1.0,
+      expand_ratio: int = 6,
+      init_stochastic_depth_rate=0.0,
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      use_keras_upsampling_2d: bool = False,
+      **kwargs):
+    """Initializes a Mobile SpineNet model.
+
+    Args:
+      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
+      min_level: An `int` of min level for output mutiscale features.
+      max_level: An `int` of max level for output mutiscale features.
+      block_specs: The block specifications for the SpineNet model discovered by
+        NAS.
+      endpoints_num_filters: An `int` of feature dimension for the output
+        endpoints.
+      se_ratio: A `float` of Squeeze-and-Excitation ratio.
+      block_repeats: An `int` of number of blocks contained in the layer.
+      filter_size_scale: A `float` of multiplier for the filters (number of
+        channels) for all convolution ops. The value must be greater than zero.
+        Typical usage will be to set this value in (0, 1) to reduce the number
+        of parameters or computation cost of the model.
+      expand_ratio: An `integer` of expansion ratios for inverted bottleneck
+        blocks.
+      init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
+      kernel_initializer: A str for kernel initializer of convolutional layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+        Default to None.
+      activation: A `str` name of the activation function.
+      use_sync_bn: If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A small `float` added to variance to avoid dividing by zero.
+      use_keras_upsampling_2d: If True, use keras UpSampling2D layer.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    self._input_specs = input_specs
+    self._min_level = min_level
+    self._max_level = max_level
+    self._block_specs = block_specs
+    self._endpoints_num_filters = endpoints_num_filters
+    self._se_ratio = se_ratio
+    self._block_repeats = block_repeats
+    self._filter_size_scale = filter_size_scale
+    self._expand_ratio = expand_ratio
+    self._init_stochastic_depth_rate = init_stochastic_depth_rate
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._activation = activation
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._use_keras_upsampling_2d = use_keras_upsampling_2d
+    self._num_init_blocks = 2
+
+    if use_sync_bn:
+      self._norm = layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = layers.BatchNormalization
+
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+
+    # Build SpineNet.
+    inputs = tf.keras.Input(shape=input_specs.shape[1:])
+
+    net = self._build_stem(inputs=inputs)
+    input_width = input_specs.shape[2]
+    if input_width is None:
+      max_stride = max(map(lambda b: b.level, block_specs))
+      input_width = 2 ** max_stride
+    net = self._build_scale_permuted_network(net=net, input_width=input_width)
+    endpoints = self._build_endpoints(net=net)
+
+    self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}
+    super().__init__(inputs=inputs, outputs=endpoints)
+
+  def _block_group(self,
+                   inputs: tf.Tensor,
+                   in_filters: int,
+                   out_filters: int,
+                   strides: int,
+                   expand_ratio: int = 6,
+                   block_repeats: int = 1,
+                   se_ratio: float = 0.2,
+                   stochastic_depth_drop_rate: Optional[float] = None,
+                   name: str = 'block_group'):
+    """Creates one group of blocks for the SpineNet model."""
+    x = nn_blocks.InvertedBottleneckBlock(
+        in_filters=in_filters,
+        out_filters=out_filters,
+        strides=strides,
+        se_ratio=se_ratio,
+        expand_ratio=expand_ratio,
+        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=self._activation,
+        use_sync_bn=self._use_sync_bn,
+        norm_momentum=self._norm_momentum,
+        norm_epsilon=self._norm_epsilon)(
+            inputs)
+    for _ in range(1, block_repeats):
+      x = nn_blocks.InvertedBottleneckBlock(
+          in_filters=in_filters,
+          out_filters=out_filters,
+          strides=1,
+          se_ratio=se_ratio,
+          expand_ratio=expand_ratio,
+          stochastic_depth_drop_rate=stochastic_depth_drop_rate,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=self._activation,
+          use_sync_bn=self._use_sync_bn,
+          norm_momentum=self._norm_momentum,
+          norm_epsilon=self._norm_epsilon)(
+              inputs)
+    return tf.keras.layers.Activation('linear', name=name)(x)
+
+  def _build_stem(self, inputs):
+    """Builds SpineNet stem."""
+    x = layers.Conv2D(
+        filters=int(FILTER_SIZE_MAP[0] * self._filter_size_scale),
+        kernel_size=3,
+        strides=2,
+        use_bias=False,
+        padding='same',
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)(
+            inputs)
+    x = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)(
+            x)
+    x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
+
+    net = []
+    stem_strides = [1, 2]
+    # Build the initial level 2 blocks.
+    for i in range(self._num_init_blocks):
+      x = self._block_group(
+          inputs=x,
+          in_filters=int(FILTER_SIZE_MAP[i] * self._filter_size_scale),
+          out_filters=int(FILTER_SIZE_MAP[i + 1] * self._filter_size_scale),
+          expand_ratio=self._expand_ratio,
+          strides=stem_strides[i],
+          se_ratio=self._se_ratio,
+          block_repeats=self._block_repeats,
+          name='stem_block_{}'.format(i + 1))
+      net.append(x)
+    return net
+
+  def _build_scale_permuted_network(self,
+                                    net,
+                                    input_width,
+                                    weighted_fusion=False):
+    """Builds scale-permuted network."""
+    net_sizes = [
+        int(math.ceil(input_width / 2)),
+        int(math.ceil(input_width / 2**2))
+    ]
+    num_outgoing_connections = [0] * len(net)
+
+    endpoints = {}
+    for i, block_spec in enumerate(self._block_specs):
+      # Update block level if it is larger than max_level to avoid building
+      # blocks smaller than requested.
+      block_spec.level = min(block_spec.level, self._max_level)
+      # Find out specs for the target block.
+      target_width = int(math.ceil(input_width / 2**block_spec.level))
+      target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
+                               self._filter_size_scale)
+
+      # Resample then merge input0 and input1.
+      parents = []
+      input0 = block_spec.input_offsets[0]
+      input1 = block_spec.input_offsets[1]
+
+      x0 = self._resample_with_sepconv(
+          inputs=net[input0],
+          input_width=net_sizes[input0],
+          target_width=target_width,
+          target_num_filters=target_num_filters)
+      parents.append(x0)
+      num_outgoing_connections[input0] += 1
+
+      x1 = self._resample_with_sepconv(
+          inputs=net[input1],
+          input_width=net_sizes[input1],
+          target_width=target_width,
+          target_num_filters=target_num_filters)
+      parents.append(x1)
+      num_outgoing_connections[input1] += 1
+
+      # Merge 0 outdegree blocks to the output block.
+      if block_spec.is_output:
+        for j, (j_feat,
+                j_connections) in enumerate(zip(net, num_outgoing_connections)):
+          if j_connections == 0 and (j_feat.shape[2] == target_width and
+                                     j_feat.shape[3] == x0.shape[3]):
+            parents.append(j_feat)
+            num_outgoing_connections[j] += 1
+
+      # pylint: disable=g-direct-tensorflow-import
+      if weighted_fusion:
+        dtype = parents[0].dtype
+        parent_weights = [
+            tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
+                i, j)), dtype=dtype)) for j in range(len(parents))]
+        weights_sum = layers.Add()(parent_weights)
+        parents = [
+            parents[i] * parent_weights[i] / (weights_sum + 0.0001)
+            for i in range(len(parents))
+        ]
+
+      # Fuse all parent nodes then build a new block.
+      x = tf_utils.get_activation(
+          self._activation, use_keras_layer=True)(layers.Add()(parents))
+      x = self._block_group(
+          inputs=x,
+          in_filters=target_num_filters,
+          out_filters=target_num_filters,
+          strides=1,
+          se_ratio=self._se_ratio,
+          expand_ratio=self._expand_ratio,
+          block_repeats=self._block_repeats,
+          stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
+              self._init_stochastic_depth_rate, i + 1, len(self._block_specs)),
+          name='scale_permuted_block_{}'.format(i + 1))
+
+      net.append(x)
+      net_sizes.append(target_width)
+      num_outgoing_connections.append(0)
+
+      # Save output feats.
+      if block_spec.is_output:
+        if block_spec.level in endpoints:
+          raise ValueError('Duplicate feats found for output level {}.'.format(
+              block_spec.level))
+        if (block_spec.level < self._min_level or
+            block_spec.level > self._max_level):
+          logging.warning(
+              'SpineNet output level out of range [min_level, max_levle] = [%s, %s] will not be used for further processing.',
+              self._min_level, self._max_level)
+        endpoints[str(block_spec.level)] = x
+
+    return endpoints
+
+  def _build_endpoints(self, net):
+    """Matches filter size for endpoints before sharing conv layers."""
+    endpoints = {}
+    for level in range(self._min_level, self._max_level + 1):
+      x = layers.Conv2D(
+          filters=self._endpoints_num_filters,
+          kernel_size=1,
+          strides=1,
+          use_bias=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)(
+              net[str(level)])
+      x = self._norm(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)(
+              x)
+      x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
+      endpoints[str(level)] = x
+    return endpoints
+
+  def _resample_with_sepconv(self, inputs, input_width, target_width,
+                             target_num_filters):
+    """Matches resolution and feature dimension."""
+    x = inputs
+    # Spatial resampling.
+    if input_width > target_width:
+      while input_width > target_width:
+        x = layers.DepthwiseConv2D(
+            kernel_size=3,
+            strides=2,
+            padding='SAME',
+            use_bias=False,
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            bias_regularizer=self._bias_regularizer)(
+                x)
+        x = self._norm(
+            axis=self._bn_axis,
+            momentum=self._norm_momentum,
+            epsilon=self._norm_epsilon)(
+                x)
+        x = tf_utils.get_activation(
+            self._activation, use_keras_layer=True)(x)
+        input_width /= 2
+    elif input_width < target_width:
+      scale = target_width // input_width
+      x = spatial_transform_ops.nearest_upsampling(
+          x, scale=scale, use_keras_layer=self._use_keras_upsampling_2d)
+
+    # Last 1x1 conv to match filter size.
+    x = layers.Conv2D(
+        filters=target_num_filters,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)(
+            x)
+    x = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)(
+            x)
+    return x
+
+  def get_config(self):
+    config_dict = {
+        'min_level': self._min_level,
+        'max_level': self._max_level,
+        'endpoints_num_filters': self._endpoints_num_filters,
+        'se_ratio': self._se_ratio,
+        'expand_ratio': self._expand_ratio,
+        'block_repeats': self._block_repeats,
+        'filter_size_scale': self._filter_size_scale,
+        'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon,
+        'use_keras_upsampling_2d': self._use_keras_upsampling_2d,
+    }
+    return config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
+
+  @property
+  def output_specs(self):
+    """A dict of {level: TensorShape} pairs for the model output."""
+    return self._output_specs
+
+
+@factory.register_backbone_builder('spinenet_mobile')
+def build_spinenet_mobile(
+    input_specs: tf.keras.layers.InputSpec,
+    backbone_config: hyperparams.Config,
+    norm_activation_config: hyperparams.Config,
+    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
+  """Builds Mobile SpineNet backbone from a config."""
+  backbone_type = backbone_config.type
+  backbone_cfg = backbone_config.get()
+  assert backbone_type == 'spinenet_mobile', (f'Inconsistent backbone type '
+                                              f'{backbone_type}')
+
+  model_id = backbone_cfg.model_id
+  if model_id not in SCALING_MAP:
+    raise ValueError(
+        'Mobile SpineNet-{} is not a valid architecture.'.format(model_id))
+  scaling_params = SCALING_MAP[model_id]
+
+  return SpineNetMobile(
+      input_specs=input_specs,
+      min_level=backbone_cfg.min_level,
+      max_level=backbone_cfg.max_level,
+      endpoints_num_filters=scaling_params['endpoints_num_filters'],
+      block_repeats=scaling_params['block_repeats'],
+      filter_size_scale=scaling_params['filter_size_scale'],
+      se_ratio=backbone_cfg.se_ratio,
+      expand_ratio=backbone_cfg.expand_ratio,
+      init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
+      kernel_regularizer=l2_regularizer,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      use_keras_upsampling_2d=backbone_cfg.use_keras_upsampling_2d)
--- a/official/vision/modeling/backbones/spinenet_mobile_test.py
+++ b/official/vision/modeling/backbones/spinenet_mobile_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for SpineNet."""
+# Import libraries
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.vision.modeling.backbones import spinenet_mobile
+
+
+class SpineNetMobileTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (128, 0.6, 1, 0.0, 24),
+      (128, 0.65, 1, 0.2, 40),
+      (256, 1.0, 1, 0.2, 48),
+  )
+  def test_network_creation(self, input_size, filter_size_scale, block_repeats,
+                            se_ratio, endpoints_num_filters):
+    """Test creation of SpineNet models."""
+    min_level = 3
+    max_level = 7
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size, input_size, 3])
+    model = spinenet_mobile.SpineNetMobile(
+        input_specs=input_specs,
+        min_level=min_level,
+        max_level=max_level,
+        endpoints_num_filters=endpoints_num_filters,
+        resample_alpha=se_ratio,
+        block_repeats=block_repeats,
+        filter_size_scale=filter_size_scale,
+        init_stochastic_depth_rate=0.2,
+    )
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = model(inputs)
+
+    for l in range(min_level, max_level + 1):
+      self.assertIn(str(l), endpoints.keys())
+      self.assertAllEqual(
+          [1, input_size / 2**l, input_size / 2**l, endpoints_num_filters],
+          endpoints[str(l)].shape.as_list())
+
+  def test_serialize_deserialize(self):
+    # Create a network object that sets all of its config options.
+    kwargs = dict(
+        min_level=3,
+        max_level=7,
+        endpoints_num_filters=256,
+        se_ratio=0.2,
+        expand_ratio=6,
+        block_repeats=1,
+        filter_size_scale=1.0,
+        init_stochastic_depth_rate=0.2,
+        use_sync_bn=False,
+        activation='relu',
+        norm_momentum=0.99,
+        norm_epsilon=0.001,
+        kernel_initializer='VarianceScaling',
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        use_keras_upsampling_2d=False,
+    )
+    network = spinenet_mobile.SpineNetMobile(**kwargs)
+
+    expected_config = dict(kwargs)
+    self.assertEqual(network.get_config(), expected_config)
+
+    # Create another network object from the first object's config.
+    new_network = spinenet_mobile.SpineNetMobile.from_config(
+        network.get_config())
+
+    # Validate that the config can be forced to JSON.
+    _ = new_network.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(network.get_config(), new_network.get_config())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/modeling/backbones/spinenet_test.py
+++ b/official/vision/modeling/backbones/spinenet_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for SpineNet."""
+# Import libraries
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.vision.modeling.backbones import spinenet
+
+
+class SpineNetTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (128, 0.65, 1, 0.5, 128, 4, 6),
+      (256, 1.0, 1, 0.5, 256, 3, 6),
+      (384, 1.0, 2, 0.5, 256, 4, 7),
+      (512, 1.0, 3, 1.0, 256, 3, 7),
+      (640, 1.3, 4, 1.0, 384, 3, 7),
+  )
+  def test_network_creation(self, input_size, filter_size_scale, block_repeats,
+                            resample_alpha, endpoints_num_filters, min_level,
+                            max_level):
+    """Test creation of SpineNet models."""
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size, input_size, 3])
+    model = spinenet.SpineNet(
+        input_specs=input_specs,
+        min_level=min_level,
+        max_level=max_level,
+        endpoints_num_filters=endpoints_num_filters,
+        resample_alpha=resample_alpha,
+        block_repeats=block_repeats,
+        filter_size_scale=filter_size_scale,
+        init_stochastic_depth_rate=0.2,
+    )
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    endpoints = model(inputs)
+
+    for l in range(min_level, max_level + 1):
+      self.assertIn(str(l), endpoints.keys())
+      self.assertAllEqual(
+          [1, input_size / 2**l, input_size / 2**l, endpoints_num_filters],
+          endpoints[str(l)].shape.as_list())
+
+  @parameterized.parameters(
+      ((128, 128), (128, 128)),
+      ((128, 128), (256, 256)),
+      ((640, 640), (896, 1664)),
+  )
+  def test_load_from_different_input_specs(self, input_size_1, input_size_2):
+    """Test loading checkpoints with different input size."""
+
+    def build_spinenet(input_size):
+      tf.keras.backend.set_image_data_format('channels_last')
+      input_specs = tf.keras.layers.InputSpec(
+          shape=[None, input_size[0], input_size[1], 3])
+      model = spinenet.SpineNet(
+          input_specs=input_specs,
+          min_level=3,
+          max_level=7,
+          endpoints_num_filters=384,
+          resample_alpha=1.0,
+          block_repeats=2,
+          filter_size_scale=0.5)
+      return model
+
+    model_1 = build_spinenet(input_size_1)
+    model_2 = build_spinenet(input_size_2)
+
+    ckpt_1 = tf.train.Checkpoint(backbone=model_1)
+    ckpt_2 = tf.train.Checkpoint(backbone=model_2)
+
+    ckpt_path = self.get_temp_dir() + '/ckpt'
+    ckpt_1.write(ckpt_path)
+    ckpt_2.restore(ckpt_path).expect_partial()
+
+  def test_serialize_deserialize(self):
+    # Create a network object that sets all of its config options.
+    kwargs = dict(
+        min_level=3,
+        max_level=7,
+        endpoints_num_filters=256,
+        resample_alpha=0.5,
+        block_repeats=1,
+        filter_size_scale=1.0,
+        init_stochastic_depth_rate=0.2,
+        use_sync_bn=False,
+        activation='relu',
+        norm_momentum=0.99,
+        norm_epsilon=0.001,
+        kernel_initializer='VarianceScaling',
+        kernel_regularizer=None,
+        bias_regularizer=None,
+    )
+    network = spinenet.SpineNet(**kwargs)
+
+    expected_config = dict(kwargs)
+    self.assertEqual(network.get_config(), expected_config)
+
+    # Create another network object from the first object's config.
+    new_network = spinenet.SpineNet.from_config(network.get_config())
+
+    # Validate that the config can be forced to JSON.
+    _ = new_network.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(network.get_config(), new_network.get_config())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/modeling/classification_model.py
+++ b/official/vision/modeling/classification_model.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Build classification models."""
+
+from typing import Any, Mapping, Optional
+# Import libraries
+import tensorflow as tf
+
+layers = tf.keras.layers
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class ClassificationModel(tf.keras.Model):
+  """A classification class builder."""
+
+  def __init__(
+      self,
+      backbone: tf.keras.Model,
+      num_classes: int,
+      input_specs: tf.keras.layers.InputSpec = layers.InputSpec(
+          shape=[None, None, None, 3]),
+      dropout_rate: float = 0.0,
+      kernel_initializer: str = 'random_uniform',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      add_head_batch_norm: bool = False,
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      skip_logits_layer: bool = False,
+      **kwargs):
+    """Classification initialization function.
+
+    Args:
+      backbone: a backbone network.
+      num_classes: `int` number of classes in classification task.
+      input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
+      dropout_rate: `float` rate for dropout regularization.
+      kernel_initializer: kernel initializer for the dense layer.
+      kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to
+                          None.
+      bias_regularizer: tf.keras.regularizers.Regularizer object. Default to
+                          None.
+      add_head_batch_norm: `bool` whether to add a batch normalization layer
+        before pool.
+      use_sync_bn: `bool` if True, use synchronized batch normalization.
+      norm_momentum: `float` normalization momentum for the moving average.
+      norm_epsilon: `float` small float added to variance to avoid dividing by
+        zero.
+      skip_logits_layer: `bool`, whether to skip the prediction layer.
+      **kwargs: keyword arguments to be passed.
+    """
+    if use_sync_bn:
+      norm = tf.keras.layers.experimental.SyncBatchNormalization
+    else:
+      norm = tf.keras.layers.BatchNormalization
+    axis = -1 if tf.keras.backend.image_data_format() == 'channels_last' else 1
+
+    inputs = tf.keras.Input(shape=input_specs.shape[1:], name=input_specs.name)
+    endpoints = backbone(inputs)
+    x = endpoints[max(endpoints.keys())]
+
+    if add_head_batch_norm:
+      x = norm(axis=axis, momentum=norm_momentum, epsilon=norm_epsilon)(x)
+    x = tf.keras.layers.GlobalAveragePooling2D()(x)
+    if not skip_logits_layer:
+      x = tf.keras.layers.Dropout(dropout_rate)(x)
+      x = tf.keras.layers.Dense(
+          num_classes,
+          kernel_initializer=kernel_initializer,
+          kernel_regularizer=kernel_regularizer,
+          bias_regularizer=bias_regularizer)(
+              x)
+
+    super(ClassificationModel, self).__init__(
+        inputs=inputs, outputs=x, **kwargs)
+    self._config_dict = {
+        'backbone': backbone,
+        'num_classes': num_classes,
+        'input_specs': input_specs,
+        'dropout_rate': dropout_rate,
+        'kernel_initializer': kernel_initializer,
+        'kernel_regularizer': kernel_regularizer,
+        'bias_regularizer': bias_regularizer,
+        'add_head_batch_norm': add_head_batch_norm,
+        'use_sync_bn': use_sync_bn,
+        'norm_momentum': norm_momentum,
+        'norm_epsilon': norm_epsilon,
+    }
+    self._input_specs = input_specs
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._backbone = backbone
+    self._norm = norm
+
+  @property
+  def checkpoint_items(self) -> Mapping[str, tf.keras.Model]:
+    """Returns a dictionary of items to be additionally checkpointed."""
+    return dict(backbone=self.backbone)
+
+  @property
+  def backbone(self) -> tf.keras.Model:
+    return self._backbone
+
+  def get_config(self) -> Mapping[str, Any]:
+    return self._config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
--- a/official/vision/modeling/classification_model_test.py
+++ b/official/vision/modeling/classification_model_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for classification network."""
+
+# Import libraries
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import strategy_combinations
+from official.vision.modeling import backbones
+from official.vision.modeling import classification_model
+
+
+class ClassificationNetworkTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (128, 50, 'relu'),
+      (128, 50, 'relu'),
+      (128, 50, 'swish'),
+  )
+  def test_resnet_network_creation(
+      self, input_size, resnet_model_id, activation):
+    """Test for creation of a ResNet-50 classifier."""
+    inputs = np.random.rand(2, input_size, input_size, 3)
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    backbone = backbones.ResNet(
+        model_id=resnet_model_id, activation=activation)
+    self.assertEqual(backbone.count_params(), 23561152)
+
+    num_classes = 1000
+    model = classification_model.ClassificationModel(
+        backbone=backbone,
+        num_classes=num_classes,
+        dropout_rate=0.2,
+    )
+    self.assertEqual(model.count_params(), 25610152)
+
+    logits = model(inputs)
+    self.assertAllEqual([2, num_classes], logits.numpy().shape)
+
+  def test_revnet_network_creation(self):
+    """Test for creation of a RevNet-56 classifier."""
+    revnet_model_id = 56
+    inputs = np.random.rand(2, 224, 224, 3)
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    backbone = backbones.RevNet(model_id=revnet_model_id)
+    self.assertEqual(backbone.count_params(), 19473792)
+
+    num_classes = 1000
+    model = classification_model.ClassificationModel(
+        backbone=backbone,
+        num_classes=num_classes,
+        dropout_rate=0.2,
+        add_head_batch_norm=True,
+    )
+    self.assertEqual(model.count_params(), 22816104)
+
+    logits = model(inputs)
+    self.assertAllEqual([2, num_classes], logits.numpy().shape)
+
+  @combinations.generate(
+      combinations.combine(
+          mobilenet_model_id=[
+              'MobileNetV1',
+              'MobileNetV2',
+              'MobileNetV3Large',
+              'MobileNetV3Small',
+              'MobileNetV3EdgeTPU',
+              'MobileNetMultiAVG',
+              'MobileNetMultiMAX',
+          ],
+          filter_size_scale=[1.0, 0.75],
+      ))
+  def test_mobilenet_network_creation(self, mobilenet_model_id,
+                                      filter_size_scale):
+    """Test for creation of a MobileNet classifier."""
+    inputs = np.random.rand(2, 224, 224, 3)
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    backbone = backbones.MobileNet(
+        model_id=mobilenet_model_id, filter_size_scale=filter_size_scale)
+
+    num_classes = 1001
+    model = classification_model.ClassificationModel(
+        backbone=backbone,
+        num_classes=num_classes,
+        dropout_rate=0.2,
+    )
+
+    logits = model(inputs)
+    self.assertAllEqual([2, num_classes], logits.numpy().shape)
+
+  @combinations.generate(
+      combinations.combine(
+          strategy=[
+              strategy_combinations.cloud_tpu_strategy,
+              strategy_combinations.one_device_strategy_gpu,
+          ],
+          use_sync_bn=[False, True],
+      ))
+  def test_sync_bn_multiple_devices(self, strategy, use_sync_bn):
+    """Test for sync bn on TPU and GPU devices."""
+    inputs = np.random.rand(64, 128, 128, 3)
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    with strategy.scope():
+      backbone = backbones.ResNet(model_id=50, use_sync_bn=use_sync_bn)
+
+      model = classification_model.ClassificationModel(
+          backbone=backbone,
+          num_classes=1000,
+          dropout_rate=0.2,
+      )
+      _ = model(inputs)
+
+  @combinations.generate(
+      combinations.combine(
+          strategy=[
+              strategy_combinations.one_device_strategy_gpu,
+          ],
+          data_format=['channels_last', 'channels_first'],
+          input_dim=[1, 3, 4]))
+  def test_data_format_gpu(self, strategy, data_format, input_dim):
+    """Test for different data formats on GPU devices."""
+    if data_format == 'channels_last':
+      inputs = np.random.rand(2, 128, 128, input_dim)
+    else:
+      inputs = np.random.rand(2, input_dim, 128, 128)
+    input_specs = tf.keras.layers.InputSpec(shape=inputs.shape)
+
+    tf.keras.backend.set_image_data_format(data_format)
+
+    with strategy.scope():
+      backbone = backbones.ResNet(model_id=50, input_specs=input_specs)
+
+      model = classification_model.ClassificationModel(
+          backbone=backbone,
+          num_classes=1000,
+          input_specs=input_specs,
+      )
+      _ = model(inputs)
+
+  def test_serialize_deserialize(self):
+    """Validate the classification net can be serialized and deserialized."""
+
+    tf.keras.backend.set_image_data_format('channels_last')
+    backbone = backbones.ResNet(model_id=50)
+
+    model = classification_model.ClassificationModel(
+        backbone=backbone, num_classes=1000)
+
+    config = model.get_config()
+    new_model = classification_model.ClassificationModel.from_config(config)
+
+    # Validate that the config can be forced to JSON.
+    _ = new_model.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(model.get_config(), new_model.get_config())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/modeling/decoders/__init__.py
+++ b/official/vision/modeling/decoders/__init__.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Decoders package definition."""
+
+from official.vision.modeling.decoders.aspp import ASPP
+from official.vision.modeling.decoders.fpn import FPN
+from official.vision.modeling.decoders.nasfpn import NASFPN
--- a/official/vision/modeling/decoders/aspp.py
+++ b/official/vision/modeling/decoders/aspp.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definitions of Atrous Spatial Pyramid Pooling (ASPP) decoder."""
+from typing import Any, List, Mapping, Optional, Union
+
+# Import libraries
+
+import tensorflow as tf
+
+from official.modeling import hyperparams
+from official.vision.modeling.decoders import factory
+from official.vision.modeling.layers import deeplab
+from official.vision.modeling.layers import nn_layers
+
+TensorMapUnion = Union[tf.Tensor, Mapping[str, tf.Tensor]]
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class ASPP(tf.keras.layers.Layer):
+  """Creates an Atrous Spatial Pyramid Pooling (ASPP) layer."""
+
+  def __init__(
+      self,
+      level: int,
+      dilation_rates: List[int],
+      num_filters: int = 256,
+      pool_kernel_size: Optional[int] = None,
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      activation: str = 'relu',
+      dropout_rate: float = 0.0,
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      interpolation: str = 'bilinear',
+      use_depthwise_convolution: bool = False,
+      spp_layer_version: str = 'v1',
+      output_tensor: bool = False,
+      **kwargs):
+    """Initializes an Atrous Spatial Pyramid Pooling (ASPP) layer.
+
+    Args:
+      level: An `int` level to apply ASPP.
+      dilation_rates: A `list` of dilation rates.
+      num_filters: An `int` number of output filters in ASPP.
+      pool_kernel_size: A `list` of [height, width] of pooling kernel size or
+        None. Pooling size is with respect to original image size, it will be
+        scaled down by 2**level. If None, global average pooling is used.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      activation: A `str` activation to be used in ASPP.
+      dropout_rate: A `float` rate for dropout regularization.
+      kernel_initializer: A `str` name of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      interpolation: A `str` of interpolation method. It should be one of
+        `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
+        `gaussian`, or `mitchellcubic`.
+      use_depthwise_convolution: If True depthwise separable convolutions will
+        be added to the Atrous spatial pyramid pooling.
+     spp_layer_version: A `str` of spatial pyramid pooling layer version.
+     output_tensor: Whether to output a single tensor or a dictionary of tensor.
+       Default is false.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super().__init__(**kwargs)
+    self._config_dict = {
+        'level': level,
+        'dilation_rates': dilation_rates,
+        'num_filters': num_filters,
+        'pool_kernel_size': pool_kernel_size,
+        'use_sync_bn': use_sync_bn,
+        'norm_momentum': norm_momentum,
+        'norm_epsilon': norm_epsilon,
+        'activation': activation,
+        'dropout_rate': dropout_rate,
+        'kernel_initializer': kernel_initializer,
+        'kernel_regularizer': kernel_regularizer,
+        'interpolation': interpolation,
+        'use_depthwise_convolution': use_depthwise_convolution,
+        'spp_layer_version': spp_layer_version,
+        'output_tensor': output_tensor
+    }
+    self._aspp_layer = deeplab.SpatialPyramidPooling if self._config_dict[
+        'spp_layer_version'] == 'v1' else nn_layers.SpatialPyramidPooling
+
+  def build(self, input_shape):
+    pool_kernel_size = None
+    if self._config_dict['pool_kernel_size']:
+      pool_kernel_size = [
+          int(p_size // 2**self._config_dict['level'])
+          for p_size in self._config_dict['pool_kernel_size']
+      ]
+
+    self.aspp = self._aspp_layer(
+        output_channels=self._config_dict['num_filters'],
+        dilation_rates=self._config_dict['dilation_rates'],
+        pool_kernel_size=pool_kernel_size,
+        use_sync_bn=self._config_dict['use_sync_bn'],
+        batchnorm_momentum=self._config_dict['norm_momentum'],
+        batchnorm_epsilon=self._config_dict['norm_epsilon'],
+        activation=self._config_dict['activation'],
+        dropout=self._config_dict['dropout_rate'],
+        kernel_initializer=self._config_dict['kernel_initializer'],
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        interpolation=self._config_dict['interpolation'],
+        use_depthwise_convolution=self._config_dict['use_depthwise_convolution']
+    )
+
+  def call(self, inputs: TensorMapUnion) -> TensorMapUnion:
+    """Calls the Atrous Spatial Pyramid Pooling (ASPP) layer on an input.
+
+    The output of ASPP will be a dict of {`level`, `tf.Tensor`} even if only one
+    level is present, if output_tensor is false. Hence, this will be compatible
+    with the rest of the segmentation model interfaces.
+    If output_tensor is true, a single tensot is output.
+
+    Args:
+      inputs: A `tf.Tensor` of shape [batch, height_l, width_l, filter_size] or
+        a `dict` of `tf.Tensor` where
+        - key: A `str` of the level of the multilevel feature maps.
+        - values: A `tf.Tensor` of shape [batch, height_l, width_l,
+          filter_size].
+
+    Returns:
+      A `tf.Tensor` of shape [batch, height_l, width_l, filter_size] or a `dict`
+        of `tf.Tensor` where
+        - key: A `str` of the level of the multilevel feature maps.
+        - values: A `tf.Tensor` of output of ASPP module.
+    """
+    outputs = {}
+    level = str(self._config_dict['level'])
+    backbone_output = inputs[level] if isinstance(inputs, dict) else inputs
+    outputs = self.aspp(backbone_output)
+    return outputs if self._config_dict['output_tensor'] else {level: outputs}
+
+  def get_config(self) -> Mapping[str, Any]:
+    base_config = super().get_config()
+    return dict(list(base_config.items()) + list(self._config_dict.items()))
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
+
+
+@factory.register_decoder_builder('aspp')
+def build_aspp_decoder(
+    input_specs: Mapping[str, tf.TensorShape],
+    model_config: hyperparams.Config,
+    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
+) -> tf.keras.Model:
+  """Builds ASPP decoder from a config.
+
+  Args:
+    input_specs: A `dict` of input specifications. A dictionary consists of
+      {level: TensorShape} from a backbone. Note this is for consistent
+        interface, and is not used by ASPP decoder.
+    model_config: A OneOfConfig. Model config.
+    l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
+      None.
+
+  Returns:
+    A `tf.keras.Model` instance of the ASPP decoder.
+
+  Raises:
+    ValueError: If the model_config.decoder.type is not `aspp`.
+  """
+  del input_specs  # input_specs is not used by ASPP decoder.
+  decoder_type = model_config.decoder.type
+  decoder_cfg = model_config.decoder.get()
+  if decoder_type != 'aspp':
+    raise ValueError(f'Inconsistent decoder type {decoder_type}. '
+                     'Need to be `aspp`.')
+
+  norm_activation_config = model_config.norm_activation
+  return ASPP(
+      level=decoder_cfg.level,
+      dilation_rates=decoder_cfg.dilation_rates,
+      num_filters=decoder_cfg.num_filters,
+      use_depthwise_convolution=decoder_cfg.use_depthwise_convolution,
+      pool_kernel_size=decoder_cfg.pool_kernel_size,
+      dropout_rate=decoder_cfg.dropout_rate,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      activation=norm_activation_config.activation,
+      kernel_regularizer=l2_regularizer,
+      spp_layer_version=decoder_cfg.spp_layer_version,
+      output_tensor=decoder_cfg.output_tensor)
--- a/official/vision/modeling/decoders/aspp_test.py
+++ b/official/vision/modeling/decoders/aspp_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for aspp."""
+
+# Import libraries
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.vision.modeling.backbones import resnet
+from official.vision.modeling.decoders import aspp
+
+
+class ASPPTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (3, [6, 12, 18, 24], 128, 'v1'),
+      (3, [6, 12, 18], 128, 'v1'),
+      (3, [6, 12], 256, 'v1'),
+      (4, [6, 12, 18, 24], 128, 'v2'),
+      (4, [6, 12, 18], 128, 'v2'),
+      (4, [6, 12], 256, 'v2'),
+  )
+  def test_network_creation(self, level, dilation_rates, num_filters,
+                            spp_layer_version):
+    """Test creation of ASPP."""
+
+    input_size = 256
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+
+    backbone = resnet.ResNet(model_id=50)
+    network = aspp.ASPP(
+        level=level,
+        dilation_rates=dilation_rates,
+        num_filters=num_filters,
+        spp_layer_version=spp_layer_version)
+
+    endpoints = backbone(inputs)
+    feats = network(endpoints)
+
+    self.assertIn(str(level), feats)
+    self.assertAllEqual(
+        [1, input_size // 2**level, input_size // 2**level, num_filters],
+        feats[str(level)].shape.as_list())
+
+  def test_serialize_deserialize(self):
+    # Create a network object that sets all of its config options.
+    kwargs = dict(
+        level=3,
+        dilation_rates=[6, 12],
+        num_filters=256,
+        pool_kernel_size=None,
+        use_sync_bn=False,
+        norm_momentum=0.99,
+        norm_epsilon=0.001,
+        activation='relu',
+        kernel_initializer='VarianceScaling',
+        kernel_regularizer=None,
+        interpolation='bilinear',
+        dropout_rate=0.2,
+        use_depthwise_convolution='false',
+        spp_layer_version='v1',
+        output_tensor=False,
+        dtype='float32',
+        name='aspp',
+        trainable=True)
+    network = aspp.ASPP(**kwargs)
+
+    expected_config = dict(kwargs)
+    self.assertEqual(network.get_config(), expected_config)
+
+    # Create another network object from the first object's config.
+    new_network = aspp.ASPP.from_config(network.get_config())
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(network.get_config(), new_network.get_config())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/modeling/decoders/factory.py
+++ b/official/vision/modeling/decoders/factory.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Decoder registers and factory method.
+
+One can register a new decoder model by the following two steps:
+
+1 Import the factory and register the build in the decoder file.
+2 Import the decoder class and add a build in __init__.py.
+
+```
+# my_decoder.py
+
+from modeling.decoders import factory
+
+class MyDecoder():
+  ...
+
+@factory.register_decoder_builder('my_decoder')
+def build_my_decoder():
+  return MyDecoder()
+
+# decoders/__init__.py adds import
+from modeling.decoders.my_decoder import MyDecoder
+```
+
+If one wants the MyDecoder class to be used only by those binary
+then don't imported the decoder module in decoders/__init__.py, but import it
+in place that uses it.
+"""
+from typing import Any, Callable, Mapping, Optional, Union
+
+# Import libraries
+
+import tensorflow as tf
+
+from official.core import registry
+from official.modeling import hyperparams
+
+_REGISTERED_DECODER_CLS = {}
+
+
+def register_decoder_builder(key: str) -> Callable[..., Any]:
+  """Decorates a builder of decoder class.
+
+  The builder should be a Callable (a class or a function).
+  This decorator supports registration of decoder builder as follows:
+
+  ```
+  class MyDecoder(tf.keras.Model):
+    pass
+
+  @register_decoder_builder('mydecoder')
+  def builder(input_specs, config, l2_reg):
+    return MyDecoder(...)
+
+  # Builds a MyDecoder object.
+  my_decoder = build_decoder_3d(input_specs, config, l2_reg)
+  ```
+
+  Args:
+    key: A `str` of key to look up the builder.
+
+  Returns:
+    A callable for using as class decorator that registers the decorated class
+    for creation from an instance of task_config_cls.
+  """
+  return registry.register(_REGISTERED_DECODER_CLS, key)
+
+
+@register_decoder_builder('identity')
+def build_identity(
+    input_specs: Optional[Mapping[str, tf.TensorShape]] = None,
+    model_config: Optional[hyperparams.Config] = None,
+    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None) -> None:
+  """Builds identity decoder from a config.
+
+  All the input arguments are not used by identity decoder but kept here to
+  ensure the interface is consistent.
+
+  Args:
+    input_specs: A `dict` of input specifications. A dictionary consists of
+      {level: TensorShape} from a backbone.
+    model_config: A `OneOfConfig` of model config.
+    l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
+      None.
+
+  Returns:
+    An instance of the identity decoder.
+  """
+  del input_specs, model_config, l2_regularizer  # Unused by identity decoder.
+
+
+def build_decoder(
+    input_specs: Mapping[str, tf.TensorShape],
+    model_config: hyperparams.Config,
+    l2_regularizer: tf.keras.regularizers.Regularizer = None,
+    **kwargs) -> Union[None, tf.keras.Model, tf.keras.layers.Layer]:  # pytype: disable=annotation-type-mismatch  # typed-keras
+  """Builds decoder from a config.
+
+  A decoder can be a keras.Model, a keras.layers.Layer, or None. If it is not
+  None, the decoder will take features from the backbone as input and generate
+  decoded feature maps. If it is None, such as an identity decoder, the decoder
+  is skipped and features from the backbone are regarded as model output.
+
+  Args:
+    input_specs: A `dict` of input specifications. A dictionary consists of
+      {level: TensorShape} from a backbone.
+    model_config: A `OneOfConfig` of model config.
+    l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
+      None.
+    **kwargs: Additional keyword args to be passed to decoder builder.
+
+  Returns:
+    An instance of the decoder.
+  """
+  decoder_builder = registry.lookup(_REGISTERED_DECODER_CLS,
+                                    model_config.decoder.type)
+
+  return decoder_builder(
+      input_specs=input_specs,
+      model_config=model_config,
+      l2_regularizer=l2_regularizer,
+      **kwargs)
--- a/official/vision/modeling/decoders/factory_test.py
+++ b/official/vision/modeling/decoders/factory_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for decoder factory functions."""
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from tensorflow.python.distribute import combinations
+from official.vision import configs
+from official.vision.configs import decoders as decoders_cfg
+from official.vision.modeling import decoders
+from official.vision.modeling.decoders import factory
+
+
+class FactoryTest(tf.test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(
+      combinations.combine(
+          num_filters=[128, 256], use_separable_conv=[True, False]))
+  def test_fpn_decoder_creation(self, num_filters, use_separable_conv):
+    """Test creation of FPN decoder."""
+    min_level = 3
+    max_level = 7
+    input_specs = {}
+    for level in range(min_level, max_level):
+      input_specs[str(level)] = tf.TensorShape(
+          [1, 128 // (2**level), 128 // (2**level), 3])
+
+    network = decoders.FPN(
+        input_specs=input_specs,
+        num_filters=num_filters,
+        use_separable_conv=use_separable_conv,
+        use_sync_bn=True)
+
+    model_config = configs.retinanet.RetinaNet()
+    model_config.min_level = min_level
+    model_config.max_level = max_level
+    model_config.num_classes = 10
+    model_config.input_size = [None, None, 3]
+    model_config.decoder = decoders_cfg.Decoder(
+        type='fpn',
+        fpn=decoders_cfg.FPN(
+            num_filters=num_filters, use_separable_conv=use_separable_conv))
+
+    factory_network = factory.build_decoder(
+        input_specs=input_specs, model_config=model_config)
+
+    network_config = network.get_config()
+    factory_network_config = factory_network.get_config()
+
+    self.assertEqual(network_config, factory_network_config)
+
+  @combinations.generate(
+      combinations.combine(
+          num_filters=[128, 256],
+          num_repeats=[3, 5],
+          use_separable_conv=[True, False]))
+  def test_nasfpn_decoder_creation(self, num_filters, num_repeats,
+                                   use_separable_conv):
+    """Test creation of NASFPN decoder."""
+    min_level = 3
+    max_level = 7
+    input_specs = {}
+    for level in range(min_level, max_level):
+      input_specs[str(level)] = tf.TensorShape(
+          [1, 128 // (2**level), 128 // (2**level), 3])
+
+    network = decoders.NASFPN(
+        input_specs=input_specs,
+        num_filters=num_filters,
+        num_repeats=num_repeats,
+        use_separable_conv=use_separable_conv,
+        use_sync_bn=True)
+
+    model_config = configs.retinanet.RetinaNet()
+    model_config.min_level = min_level
+    model_config.max_level = max_level
+    model_config.num_classes = 10
+    model_config.input_size = [None, None, 3]
+    model_config.decoder = decoders_cfg.Decoder(
+        type='nasfpn',
+        nasfpn=decoders_cfg.NASFPN(
+            num_filters=num_filters,
+            num_repeats=num_repeats,
+            use_separable_conv=use_separable_conv))
+
+    factory_network = factory.build_decoder(
+        input_specs=input_specs, model_config=model_config)
+
+    network_config = network.get_config()
+    factory_network_config = factory_network.get_config()
+
+    self.assertEqual(network_config, factory_network_config)
+
+  @combinations.generate(
+      combinations.combine(
+          level=[3, 4],
+          dilation_rates=[[6, 12, 18], [6, 12]],
+          num_filters=[128, 256]))
+  def test_aspp_decoder_creation(self, level, dilation_rates, num_filters):
+    """Test creation of ASPP decoder."""
+    input_specs = {'1': tf.TensorShape([1, 128, 128, 3])}
+
+    network = decoders.ASPP(
+        level=level,
+        dilation_rates=dilation_rates,
+        num_filters=num_filters,
+        use_sync_bn=True)
+
+    model_config = configs.semantic_segmentation.SemanticSegmentationModel()
+    model_config.num_classes = 10
+    model_config.input_size = [None, None, 3]
+    model_config.decoder = decoders_cfg.Decoder(
+        type='aspp',
+        aspp=decoders_cfg.ASPP(
+            level=level, dilation_rates=dilation_rates,
+            num_filters=num_filters))
+
+    factory_network = factory.build_decoder(
+        input_specs=input_specs, model_config=model_config)
+
+    network_config = network.get_config()
+    factory_network_config = factory_network.get_config()
+    # Due to calling `super().get_config()` in aspp layer, everything but the
+    # the name of two layer instances are the same, so we force equal name so it
+    # will not give false alarm.
+    factory_network_config['name'] = network_config['name']
+
+    self.assertEqual(network_config, factory_network_config)
+
+  def test_identity_decoder_creation(self):
+    """Test creation of identity decoder."""
+    model_config = configs.retinanet.RetinaNet()
+    model_config.num_classes = 2
+    model_config.input_size = [None, None, 3]
+
+    model_config.decoder = decoders_cfg.Decoder(
+        type='identity', identity=decoders_cfg.Identity())
+
+    factory_network = factory.build_decoder(
+        input_specs=None, model_config=model_config)
+
+    self.assertIsNone(factory_network)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/modeling/decoders/fpn.py
+++ b/official/vision/modeling/decoders/fpn.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains the definitions of Feature Pyramid Networks (FPN)."""
+from typing import Any, Mapping, Optional
+
+# Import libraries
+from absl import logging
+import tensorflow as tf
+
+from official.modeling import hyperparams
+from official.modeling import tf_utils
+from official.vision.modeling.decoders import factory
+from official.vision.ops import spatial_transform_ops
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class FPN(tf.keras.Model):
+  """Creates a Feature Pyramid Network (FPN).
+
+  This implemets the paper:
+  Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and
+  Serge Belongie.
+  Feature Pyramid Networks for Object Detection.
+  (https://arxiv.org/pdf/1612.03144)
+  """
+
+  def __init__(
+      self,
+      input_specs: Mapping[str, tf.TensorShape],
+      min_level: int = 3,
+      max_level: int = 7,
+      num_filters: int = 256,
+      fusion_type: str = 'sum',
+      use_separable_conv: bool = False,
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a Feature Pyramid Network (FPN).
+
+    Args:
+      input_specs: A `dict` of input specifications. A dictionary consists of
+        {level: TensorShape} from a backbone.
+      min_level: An `int` of minimum level in FPN output feature maps.
+      max_level: An `int` of maximum level in FPN output feature maps.
+      num_filters: An `int` number of filters in FPN layers.
+      fusion_type: A `str` of `sum` or `concat`. Whether performing sum or
+        concat for feature fusion.
+      use_separable_conv: A `bool`.  If True use separable convolution for
+        convolution in FPN layers.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_initializer: A `str` name of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    self._config_dict = {
+        'input_specs': input_specs,
+        'min_level': min_level,
+        'max_level': max_level,
+        'num_filters': num_filters,
+        'fusion_type': fusion_type,
+        'use_separable_conv': use_separable_conv,
+        'activation': activation,
+        'use_sync_bn': use_sync_bn,
+        'norm_momentum': norm_momentum,
+        'norm_epsilon': norm_epsilon,
+        'kernel_initializer': kernel_initializer,
+        'kernel_regularizer': kernel_regularizer,
+        'bias_regularizer': bias_regularizer,
+    }
+    if use_separable_conv:
+      conv2d = tf.keras.layers.SeparableConv2D
+    else:
+      conv2d = tf.keras.layers.Conv2D
+    if use_sync_bn:
+      norm = tf.keras.layers.experimental.SyncBatchNormalization
+    else:
+      norm = tf.keras.layers.BatchNormalization
+    activation_fn = tf.keras.layers.Activation(
+        tf_utils.get_activation(activation))
+
+    # Build input feature pyramid.
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      bn_axis = -1
+    else:
+      bn_axis = 1
+
+    # Get input feature pyramid from backbone.
+    logging.info('FPN input_specs: %s', input_specs)
+    inputs = self._build_input_pyramid(input_specs, min_level)
+    backbone_max_level = min(int(max(inputs.keys())), max_level)
+
+    # Build lateral connections.
+    feats_lateral = {}
+    for level in range(min_level, backbone_max_level + 1):
+      feats_lateral[str(level)] = conv2d(
+          filters=num_filters,
+          kernel_size=1,
+          padding='same',
+          kernel_initializer=kernel_initializer,
+          kernel_regularizer=kernel_regularizer,
+          bias_regularizer=bias_regularizer)(
+              inputs[str(level)])
+
+    # Build top-down path.
+    feats = {str(backbone_max_level): feats_lateral[str(backbone_max_level)]}
+    for level in range(backbone_max_level - 1, min_level - 1, -1):
+      feat_a = spatial_transform_ops.nearest_upsampling(
+          feats[str(level + 1)], 2)
+      feat_b = feats_lateral[str(level)]
+
+      if fusion_type == 'sum':
+        feats[str(level)] = feat_a + feat_b
+      elif fusion_type == 'concat':
+        feats[str(level)] = tf.concat([feat_a, feat_b], axis=-1)
+      else:
+        raise ValueError('Fusion type {} not supported.'.format(fusion_type))
+
+    # TODO(xianzhi): consider to remove bias in conv2d.
+    # Build post-hoc 3x3 convolution kernel.
+    for level in range(min_level, backbone_max_level + 1):
+      feats[str(level)] = conv2d(
+          filters=num_filters,
+          strides=1,
+          kernel_size=3,
+          padding='same',
+          kernel_initializer=kernel_initializer,
+          kernel_regularizer=kernel_regularizer,
+          bias_regularizer=bias_regularizer)(
+              feats[str(level)])
+
+    # TODO(xianzhi): consider to remove bias in conv2d.
+    # Build coarser FPN levels introduced for RetinaNet.
+    for level in range(backbone_max_level + 1, max_level + 1):
+      feats_in = feats[str(level - 1)]
+      if level > backbone_max_level + 1:
+        feats_in = activation_fn(feats_in)
+      feats[str(level)] = conv2d(
+          filters=num_filters,
+          strides=2,
+          kernel_size=3,
+          padding='same',
+          kernel_initializer=kernel_initializer,
+          kernel_regularizer=kernel_regularizer,
+          bias_regularizer=bias_regularizer)(
+              feats_in)
+
+    # Apply batch norm layers.
+    for level in range(min_level, max_level + 1):
+      feats[str(level)] = norm(
+          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
+              feats[str(level)])
+
+    self._output_specs = {
+        str(level): feats[str(level)].get_shape()
+        for level in range(min_level, max_level + 1)
+    }
+
+    super(FPN, self).__init__(inputs=inputs, outputs=feats, **kwargs)
+
+  def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape],
+                           min_level: int):
+    assert isinstance(input_specs, dict)
+    if min(input_specs.keys()) > str(min_level):
+      raise ValueError(
+          'Backbone min level should be less or equal to FPN min level')
+
+    inputs = {}
+    for level, spec in input_specs.items():
+      inputs[level] = tf.keras.Input(shape=spec[1:])
+    return inputs
+
+  def get_config(self) -> Mapping[str, Any]:
+    return self._config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
+
+  @property
+  def output_specs(self) -> Mapping[str, tf.TensorShape]:
+    """A dict of {level: TensorShape} pairs for the model output."""
+    return self._output_specs
+
+
+@factory.register_decoder_builder('fpn')
+def build_fpn_decoder(
+    input_specs: Mapping[str, tf.TensorShape],
+    model_config: hyperparams.Config,
+    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
+) -> tf.keras.Model:
+  """Builds FPN decoder from a config.
+
+  Args:
+    input_specs: A `dict` of input specifications. A dictionary consists of
+      {level: TensorShape} from a backbone.
+    model_config: A OneOfConfig. Model config.
+    l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
+      None.
+
+  Returns:
+    A `tf.keras.Model` instance of the FPN decoder.
+
+  Raises:
+    ValueError: If the model_config.decoder.type is not `fpn`.
+  """
+  decoder_type = model_config.decoder.type
+  decoder_cfg = model_config.decoder.get()
+  if decoder_type != 'fpn':
+    raise ValueError(f'Inconsistent decoder type {decoder_type}. '
+                     'Need to be `fpn`.')
+  norm_activation_config = model_config.norm_activation
+  return FPN(
+      input_specs=input_specs,
+      min_level=model_config.min_level,
+      max_level=model_config.max_level,
+      num_filters=decoder_cfg.num_filters,
+      fusion_type=decoder_cfg.fusion_type,
+      use_separable_conv=decoder_cfg.use_separable_conv,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer)
--- a/official/vision/modeling/decoders/fpn_test.py
+++ b/official/vision/modeling/decoders/fpn_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for FPN."""
+
+# Import libraries
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.vision.modeling.backbones import mobilenet
+from official.vision.modeling.backbones import resnet
+from official.vision.modeling.decoders import fpn
+
+
+class FPNTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (256, 3, 7, False, 'sum'),
+      (256, 3, 7, True, 'concat'),
+  )
+  def test_network_creation(self, input_size, min_level, max_level,
+                            use_separable_conv, fusion_type):
+    """Test creation of FPN."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+
+    backbone = resnet.ResNet(model_id=50)
+    network = fpn.FPN(
+        input_specs=backbone.output_specs,
+        min_level=min_level,
+        max_level=max_level,
+        fusion_type=fusion_type,
+        use_separable_conv=use_separable_conv)
+
+    endpoints = backbone(inputs)
+    feats = network(endpoints)
+
+    for level in range(min_level, max_level + 1):
+      self.assertIn(str(level), feats)
+      self.assertAllEqual(
+          [1, input_size // 2**level, input_size // 2**level, 256],
+          feats[str(level)].shape.as_list())
+
+  @parameterized.parameters(
+      (256, 3, 7, False),
+      (256, 3, 7, True),
+  )
+  def test_network_creation_with_mobilenet(self, input_size, min_level,
+                                           max_level, use_separable_conv):
+    """Test creation of FPN with mobilenet backbone."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+
+    backbone = mobilenet.MobileNet(model_id='MobileNetV2')
+    network = fpn.FPN(
+        input_specs=backbone.output_specs,
+        min_level=min_level,
+        max_level=max_level,
+        use_separable_conv=use_separable_conv)
+
+    endpoints = backbone(inputs)
+    feats = network(endpoints)
+
+    for level in range(min_level, max_level + 1):
+      self.assertIn(str(level), feats)
+      self.assertAllEqual(
+          [1, input_size // 2**level, input_size // 2**level, 256],
+          feats[str(level)].shape.as_list())
+
+  def test_serialize_deserialize(self):
+    # Create a network object that sets all of its config options.
+    kwargs = dict(
+        input_specs=resnet.ResNet(model_id=50).output_specs,
+        min_level=3,
+        max_level=7,
+        num_filters=256,
+        fusion_type='sum',
+        use_separable_conv=False,
+        use_sync_bn=False,
+        activation='relu',
+        norm_momentum=0.99,
+        norm_epsilon=0.001,
+        kernel_initializer='VarianceScaling',
+        kernel_regularizer=None,
+        bias_regularizer=None,
+    )
+    network = fpn.FPN(**kwargs)
+
+    expected_config = dict(kwargs)
+    self.assertEqual(network.get_config(), expected_config)
+
+    # Create another network object from the first object's config.
+    new_network = fpn.FPN.from_config(network.get_config())
+
+    # Validate that the config can be forced to JSON.
+    _ = new_network.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(network.get_config(), new_network.get_config())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/modeling/decoders/nasfpn.py
+++ b/official/vision/modeling/decoders/nasfpn.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definitions of NAS-FPN."""
+
+from typing import Any, List, Mapping, Optional, Tuple
+
+# Import libraries
+
+from absl import logging
+import tensorflow as tf
+
+from official.modeling import hyperparams
+from official.modeling import tf_utils
+from official.vision.modeling.decoders import factory
+from official.vision.ops import spatial_transform_ops
+
+
+# The fixed NAS-FPN architecture discovered by NAS.
+# Each element represents a specification of a building block:
+#   (block_level, combine_fn, (input_offset0, input_offset1), is_output).
+NASFPN_BLOCK_SPECS = [
+    (4, 'attention', (1, 3), False),
+    (4, 'sum', (1, 5), False),
+    (3, 'sum', (0, 6), True),
+    (4, 'sum', (6, 7), True),
+    (5, 'attention', (7, 8), True),
+    (7, 'attention', (6, 9), True),
+    (6, 'attention', (9, 10), True),
+]
+
+
+class BlockSpec():
+  """A container class that specifies the block configuration for NAS-FPN."""
+
+  def __init__(self, level: int, combine_fn: str,
+               input_offsets: Tuple[int, int], is_output: bool):
+    self.level = level
+    self.combine_fn = combine_fn
+    self.input_offsets = input_offsets
+    self.is_output = is_output
+
+
+def build_block_specs(
+    block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]:
+  """Builds the list of BlockSpec objects for NAS-FPN."""
+  if not block_specs:
+    block_specs = NASFPN_BLOCK_SPECS
+  logging.info('Building NAS-FPN block specs: %s', block_specs)
+  return [BlockSpec(*b) for b in block_specs]
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class NASFPN(tf.keras.Model):
+  """Creates a NAS-FPN model.
+
+  This implements the paper:
+  Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le.
+  NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection.
+  (https://arxiv.org/abs/1904.07392)
+  """
+
+  def __init__(
+      self,
+      input_specs: Mapping[str, tf.TensorShape],
+      min_level: int = 3,
+      max_level: int = 7,
+      block_specs: List[BlockSpec] = build_block_specs(),
+      num_filters: int = 256,
+      num_repeats: int = 5,
+      use_separable_conv: bool = False,
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a NAS-FPN model.
+
+    Args:
+      input_specs: A `dict` of input specifications. A dictionary consists of
+        {level: TensorShape} from a backbone.
+      min_level: An `int` of minimum level in FPN output feature maps.
+      max_level: An `int` of maximum level in FPN output feature maps.
+      block_specs: a list of BlockSpec objects that specifies the NAS-FPN
+        network topology. By default, the previously discovered architecture is
+        used.
+      num_filters: An `int` number of filters in FPN layers.
+      num_repeats: number of repeats for feature pyramid network.
+      use_separable_conv: A `bool`.  If True use separable convolution for
+        convolution in FPN layers.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_initializer: A `str` name of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    self._config_dict = {
+        'input_specs': input_specs,
+        'min_level': min_level,
+        'max_level': max_level,
+        'num_filters': num_filters,
+        'num_repeats': num_repeats,
+        'use_separable_conv': use_separable_conv,
+        'activation': activation,
+        'use_sync_bn': use_sync_bn,
+        'norm_momentum': norm_momentum,
+        'norm_epsilon': norm_epsilon,
+        'kernel_initializer': kernel_initializer,
+        'kernel_regularizer': kernel_regularizer,
+        'bias_regularizer': bias_regularizer,
+    }
+    self._min_level = min_level
+    self._max_level = max_level
+    self._block_specs = block_specs
+    self._num_repeats = num_repeats
+    self._conv_op = (tf.keras.layers.SeparableConv2D
+                     if self._config_dict['use_separable_conv']
+                     else tf.keras.layers.Conv2D)
+    if self._config_dict['use_separable_conv']:
+      self._conv_kwargs = {
+          'depthwise_initializer': tf.keras.initializers.VarianceScaling(
+              scale=2, mode='fan_out', distribution='untruncated_normal'),
+          'pointwise_initializer': tf.keras.initializers.VarianceScaling(
+              scale=2, mode='fan_out', distribution='untruncated_normal'),
+          'bias_initializer': tf.zeros_initializer(),
+          'depthwise_regularizer': self._config_dict['kernel_regularizer'],
+          'pointwise_regularizer': self._config_dict['kernel_regularizer'],
+          'bias_regularizer': self._config_dict['bias_regularizer'],
+      }
+    else:
+      self._conv_kwargs = {
+          'kernel_initializer': tf.keras.initializers.VarianceScaling(
+              scale=2, mode='fan_out', distribution='untruncated_normal'),
+          'bias_initializer': tf.zeros_initializer(),
+          'kernel_regularizer': self._config_dict['kernel_regularizer'],
+          'bias_regularizer': self._config_dict['bias_regularizer'],
+      }
+    self._norm_op = (tf.keras.layers.experimental.SyncBatchNormalization
+                     if self._config_dict['use_sync_bn']
+                     else tf.keras.layers.BatchNormalization)
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._norm_kwargs = {
+        'axis': self._bn_axis,
+        'momentum': self._config_dict['norm_momentum'],
+        'epsilon': self._config_dict['norm_epsilon'],
+    }
+    self._activation = tf_utils.get_activation(activation)
+
+    # Gets input feature pyramid from backbone.
+    inputs = self._build_input_pyramid(input_specs, min_level)
+
+    # Projects the input features.
+    feats = []
+    for level in range(self._min_level, self._max_level + 1):
+      if str(level) in inputs.keys():
+        feats.append(self._resample_feature_map(
+            inputs[str(level)], level, level, self._config_dict['num_filters']))
+      else:
+        feats.append(self._resample_feature_map(
+            feats[-1], level - 1, level, self._config_dict['num_filters']))
+
+    # Repeatly builds the NAS-FPN modules.
+    for _ in range(self._num_repeats):
+      output_feats = self._build_feature_pyramid(feats)
+      feats = [output_feats[level]
+               for level in range(self._min_level, self._max_level + 1)]
+
+    self._output_specs = {
+        str(level): output_feats[level].get_shape()
+        for level in range(min_level, max_level + 1)
+    }
+    output_feats = {str(level): output_feats[level]
+                    for level in output_feats.keys()}
+    super(NASFPN, self).__init__(inputs=inputs, outputs=output_feats, **kwargs)
+
+  def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape],
+                           min_level: int):
+    assert isinstance(input_specs, dict)
+    if min(input_specs.keys()) > str(min_level):
+      raise ValueError(
+          'Backbone min level should be less or equal to FPN min level')
+
+    inputs = {}
+    for level, spec in input_specs.items():
+      inputs[level] = tf.keras.Input(shape=spec[1:])
+    return inputs
+
+  def _resample_feature_map(self,
+                            inputs,
+                            input_level,
+                            target_level,
+                            target_num_filters=256):
+    x = inputs
+    _, _, _, input_num_filters = x.get_shape().as_list()
+    if input_num_filters != target_num_filters:
+      x = self._conv_op(
+          filters=target_num_filters,
+          kernel_size=1,
+          padding='same',
+          **self._conv_kwargs)(x)
+      x = self._norm_op(**self._norm_kwargs)(x)
+
+    if input_level < target_level:
+      stride = int(2 ** (target_level - input_level))
+      return tf.keras.layers.MaxPool2D(
+          pool_size=stride, strides=stride, padding='same')(x)
+    if input_level > target_level:
+      scale = int(2 ** (input_level - target_level))
+      return spatial_transform_ops.nearest_upsampling(x, scale=scale)
+
+    # Force output x to be the same dtype as mixed precision policy. This avoids
+    # dtype mismatch when one input (by default float32 dtype) does not meet all
+    # the above conditions and is output unchanged, while other inputs are
+    # processed to have different dtype, e.g., using bfloat16 on TPU.
+    compute_dtype = tf.keras.layers.Layer().dtype_policy.compute_dtype
+    if (compute_dtype is not None) and (x.dtype != compute_dtype):
+      return tf.cast(x, dtype=compute_dtype)
+    else:
+      return x
+
+  def _global_attention(self, feat0, feat1):
+    m = tf.math.reduce_max(feat0, axis=[1, 2], keepdims=True)
+    m = tf.math.sigmoid(m)
+    return feat0 + feat1 * m
+
+  def _build_feature_pyramid(self, feats):
+    num_output_connections = [0] * len(feats)
+    num_output_levels = self._max_level - self._min_level + 1
+    feat_levels = list(range(self._min_level, self._max_level + 1))
+
+    for i, block_spec in enumerate(self._block_specs):
+      new_level = block_spec.level
+
+      # Checks the range of input_offsets.
+      for input_offset in block_spec.input_offsets:
+        if input_offset >= len(feats):
+          raise ValueError(
+              'input_offset ({}) is larger than num feats({})'.format(
+                  input_offset, len(feats)))
+      input0 = block_spec.input_offsets[0]
+      input1 = block_spec.input_offsets[1]
+
+      # Update graph with inputs.
+      node0 = feats[input0]
+      node0_level = feat_levels[input0]
+      num_output_connections[input0] += 1
+      node0 = self._resample_feature_map(node0, node0_level, new_level)
+      node1 = feats[input1]
+      node1_level = feat_levels[input1]
+      num_output_connections[input1] += 1
+      node1 = self._resample_feature_map(node1, node1_level, new_level)
+
+      # Combine node0 and node1 to create new feat.
+      if block_spec.combine_fn == 'sum':
+        new_node = node0 + node1
+      elif block_spec.combine_fn == 'attention':
+        if node0_level >= node1_level:
+          new_node = self._global_attention(node0, node1)
+        else:
+          new_node = self._global_attention(node1, node0)
+      else:
+        raise ValueError('unknown combine_fn `{}`.'
+                         .format(block_spec.combine_fn))
+
+      # Add intermediate nodes that do not have any connections to output.
+      if block_spec.is_output:
+        for j, (feat, feat_level, num_output) in enumerate(
+            zip(feats, feat_levels, num_output_connections)):
+          if num_output == 0 and feat_level == new_level:
+            num_output_connections[j] += 1
+
+            feat_ = self._resample_feature_map(feat, feat_level, new_level)
+            new_node += feat_
+
+      new_node = self._activation(new_node)
+      new_node = self._conv_op(
+          filters=self._config_dict['num_filters'],
+          kernel_size=(3, 3),
+          padding='same',
+          **self._conv_kwargs)(new_node)
+      new_node = self._norm_op(**self._norm_kwargs)(new_node)
+
+      feats.append(new_node)
+      feat_levels.append(new_level)
+      num_output_connections.append(0)
+
+    output_feats = {}
+    for i in range(len(feats) - num_output_levels, len(feats)):
+      level = feat_levels[i]
+      output_feats[level] = feats[i]
+    logging.info('Output feature pyramid: %s', output_feats)
+    return output_feats
+
+  def get_config(self) -> Mapping[str, Any]:
+    return self._config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
+
+  @property
+  def output_specs(self) -> Mapping[str, tf.TensorShape]:
+    """A dict of {level: TensorShape} pairs for the model output."""
+    return self._output_specs
+
+
+@factory.register_decoder_builder('nasfpn')
+def build_nasfpn_decoder(
+    input_specs: Mapping[str, tf.TensorShape],
+    model_config: hyperparams.Config,
+    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
+) -> tf.keras.Model:
+  """Builds NASFPN decoder from a config.
+
+  Args:
+    input_specs: A `dict` of input specifications. A dictionary consists of
+      {level: TensorShape} from a backbone.
+    model_config: A OneOfConfig. Model config.
+    l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
+      None.
+
+  Returns:
+    A `tf.keras.Model` instance of the NASFPN decoder.
+
+  Raises:
+    ValueError: If the model_config.decoder.type is not `nasfpn`.
+  """
+  decoder_type = model_config.decoder.type
+  decoder_cfg = model_config.decoder.get()
+  if decoder_type != 'nasfpn':
+    raise ValueError(f'Inconsistent decoder type {decoder_type}. '
+                     'Need to be `nasfpn`.')
+
+  norm_activation_config = model_config.norm_activation
+  return NASFPN(
+      input_specs=input_specs,
+      min_level=model_config.min_level,
+      max_level=model_config.max_level,
+      num_filters=decoder_cfg.num_filters,
+      num_repeats=decoder_cfg.num_repeats,
+      use_separable_conv=decoder_cfg.use_separable_conv,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer)
--- a/official/vision/modeling/decoders/nasfpn_test.py
+++ b/official/vision/modeling/decoders/nasfpn_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for NAS-FPN."""
+
+# Import libraries
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.vision.modeling.backbones import resnet
+from official.vision.modeling.decoders import nasfpn
+
+
+class NASFPNTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (256, 3, 7, False),
+      (256, 3, 7, True),
+  )
+  def test_network_creation(self, input_size, min_level, max_level,
+                            use_separable_conv):
+    """Test creation of NAS-FPN."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+
+    num_filters = 256
+    backbone = resnet.ResNet(model_id=50)
+    network = nasfpn.NASFPN(
+        input_specs=backbone.output_specs,
+        min_level=min_level,
+        max_level=max_level,
+        num_filters=num_filters,
+        use_separable_conv=use_separable_conv)
+
+    endpoints = backbone(inputs)
+    feats = network(endpoints)
+
+    for level in range(min_level, max_level + 1):
+      self.assertIn(str(level), feats)
+      self.assertAllEqual(
+          [1, input_size // 2**level, input_size // 2**level, num_filters],
+          feats[str(level)].shape.as_list())
+
+
+if __name__ == '__main__':
+  tf.test.main()