Removes unneeded content of the beta folder.

PiperOrigin-RevId: 437276665

Removes unneeded content of the beta folder.
PiperOrigin-RevId: 437276665
e4be7e00 · Yeqing Li · A. Unique TensorFlower · f47405b5 · f47405b5 · f47405b5
Commit e4be7e00 authored Mar 25, 2022 by Yeqing Li Committed by A. Unique TensorFlower Mar 25, 2022
20 changed files
--- a/official/vision/beta/modeling/decoders/factory.py
+++ b/official/vision/beta/modeling/decoders/factory.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Decoder registers and factory method.
-
-One can register a new decoder model by the following two steps:
-
-1 Import the factory and register the build in the decoder file.
-2 Import the decoder class and add a build in __init__.py.
-
-```
-# my_decoder.py
-
-from modeling.decoders import factory
-
-class MyDecoder():
-  ...
-
-@factory.register_decoder_builder('my_decoder')
-def build_my_decoder():
-  return MyDecoder()
-
-# decoders/__init__.py adds import
-from modeling.decoders.my_decoder import MyDecoder
-```
-
-If one wants the MyDecoder class to be used only by those binary
-then don't imported the decoder module in decoders/__init__.py, but import it
-in place that uses it.
-"""
-from typing import Any, Callable, Mapping, Optional, Union
-
-# Import libraries
-
-import tensorflow as tf
-
-from official.core import registry
-from official.modeling import hyperparams
-
-_REGISTERED_DECODER_CLS = {}
-
-
-def register_decoder_builder(key: str) -> Callable[..., Any]:
-  """Decorates a builder of decoder class.
-
-  The builder should be a Callable (a class or a function).
-  This decorator supports registration of decoder builder as follows:
-
-  ```
-  class MyDecoder(tf.keras.Model):
-    pass
-
-  @register_decoder_builder('mydecoder')
-  def builder(input_specs, config, l2_reg):
-    return MyDecoder(...)
-
-  # Builds a MyDecoder object.
-  my_decoder = build_decoder_3d(input_specs, config, l2_reg)
-  ```
-
-  Args:
-    key: A `str` of key to look up the builder.
-
-  Returns:
-    A callable for using as class decorator that registers the decorated class
-    for creation from an instance of task_config_cls.
-  """
-  return registry.register(_REGISTERED_DECODER_CLS, key)
-
-
-@register_decoder_builder('identity')
-def build_identity(
-    input_specs: Optional[Mapping[str, tf.TensorShape]] = None,
-    model_config: Optional[hyperparams.Config] = None,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None) -> None:
-  """Builds identity decoder from a config.
-
-  All the input arguments are not used by identity decoder but kept here to
-  ensure the interface is consistent.
-
-  Args:
-    input_specs: A `dict` of input specifications. A dictionary consists of
-      {level: TensorShape} from a backbone.
-    model_config: A `OneOfConfig` of model config.
-    l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
-      None.
-
-  Returns:
-    An instance of the identity decoder.
-  """
-  del input_specs, model_config, l2_regularizer  # Unused by identity decoder.
-
-
-def build_decoder(
-    input_specs: Mapping[str, tf.TensorShape],
-    model_config: hyperparams.Config,
-    l2_regularizer: tf.keras.regularizers.Regularizer = None,
-    **kwargs) -> Union[None, tf.keras.Model, tf.keras.layers.Layer]:  # pytype: disable=annotation-type-mismatch  # typed-keras
-  """Builds decoder from a config.
-
-  A decoder can be a keras.Model, a keras.layers.Layer, or None. If it is not
-  None, the decoder will take features from the backbone as input and generate
-  decoded feature maps. If it is None, such as an identity decoder, the decoder
-  is skipped and features from the backbone are regarded as model output.
-
-  Args:
-    input_specs: A `dict` of input specifications. A dictionary consists of
-      {level: TensorShape} from a backbone.
-    model_config: A `OneOfConfig` of model config.
-    l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
-      None.
-    **kwargs: Additional keyword args to be passed to decoder builder.
-
-  Returns:
-    An instance of the decoder.
-  """
-  decoder_builder = registry.lookup(_REGISTERED_DECODER_CLS,
-                                    model_config.decoder.type)
-
-  return decoder_builder(
-      input_specs=input_specs,
-      model_config=model_config,
-      l2_regularizer=l2_regularizer,
-      **kwargs)
--- a/official/vision/beta/modeling/decoders/factory_test.py
+++ b/official/vision/beta/modeling/decoders/factory_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for decoder factory functions."""
-
-from absl.testing import parameterized
-import tensorflow as tf
-
-from tensorflow.python.distribute import combinations
-from official.vision.beta import configs
-from official.vision.beta.configs import decoders as decoders_cfg
-from official.vision.beta.modeling import decoders
-from official.vision.beta.modeling.decoders import factory
-
-
-class FactoryTest(tf.test.TestCase, parameterized.TestCase):
-
-  @combinations.generate(
-      combinations.combine(
-          num_filters=[128, 256], use_separable_conv=[True, False]))
-  def test_fpn_decoder_creation(self, num_filters, use_separable_conv):
-    """Test creation of FPN decoder."""
-    min_level = 3
-    max_level = 7
-    input_specs = {}
-    for level in range(min_level, max_level):
-      input_specs[str(level)] = tf.TensorShape(
-          [1, 128 // (2**level), 128 // (2**level), 3])
-
-    network = decoders.FPN(
-        input_specs=input_specs,
-        num_filters=num_filters,
-        use_separable_conv=use_separable_conv,
-        use_sync_bn=True)
-
-    model_config = configs.retinanet.RetinaNet()
-    model_config.min_level = min_level
-    model_config.max_level = max_level
-    model_config.num_classes = 10
-    model_config.input_size = [None, None, 3]
-    model_config.decoder = decoders_cfg.Decoder(
-        type='fpn',
-        fpn=decoders_cfg.FPN(
-            num_filters=num_filters, use_separable_conv=use_separable_conv))
-
-    factory_network = factory.build_decoder(
-        input_specs=input_specs, model_config=model_config)
-
-    network_config = network.get_config()
-    factory_network_config = factory_network.get_config()
-
-    self.assertEqual(network_config, factory_network_config)
-
-  @combinations.generate(
-      combinations.combine(
-          num_filters=[128, 256],
-          num_repeats=[3, 5],
-          use_separable_conv=[True, False]))
-  def test_nasfpn_decoder_creation(self, num_filters, num_repeats,
-                                   use_separable_conv):
-    """Test creation of NASFPN decoder."""
-    min_level = 3
-    max_level = 7
-    input_specs = {}
-    for level in range(min_level, max_level):
-      input_specs[str(level)] = tf.TensorShape(
-          [1, 128 // (2**level), 128 // (2**level), 3])
-
-    network = decoders.NASFPN(
-        input_specs=input_specs,
-        num_filters=num_filters,
-        num_repeats=num_repeats,
-        use_separable_conv=use_separable_conv,
-        use_sync_bn=True)
-
-    model_config = configs.retinanet.RetinaNet()
-    model_config.min_level = min_level
-    model_config.max_level = max_level
-    model_config.num_classes = 10
-    model_config.input_size = [None, None, 3]
-    model_config.decoder = decoders_cfg.Decoder(
-        type='nasfpn',
-        nasfpn=decoders_cfg.NASFPN(
-            num_filters=num_filters,
-            num_repeats=num_repeats,
-            use_separable_conv=use_separable_conv))
-
-    factory_network = factory.build_decoder(
-        input_specs=input_specs, model_config=model_config)
-
-    network_config = network.get_config()
-    factory_network_config = factory_network.get_config()
-
-    self.assertEqual(network_config, factory_network_config)
-
-  @combinations.generate(
-      combinations.combine(
-          level=[3, 4],
-          dilation_rates=[[6, 12, 18], [6, 12]],
-          num_filters=[128, 256]))
-  def test_aspp_decoder_creation(self, level, dilation_rates, num_filters):
-    """Test creation of ASPP decoder."""
-    input_specs = {'1': tf.TensorShape([1, 128, 128, 3])}
-
-    network = decoders.ASPP(
-        level=level,
-        dilation_rates=dilation_rates,
-        num_filters=num_filters,
-        use_sync_bn=True)
-
-    model_config = configs.semantic_segmentation.SemanticSegmentationModel()
-    model_config.num_classes = 10
-    model_config.input_size = [None, None, 3]
-    model_config.decoder = decoders_cfg.Decoder(
-        type='aspp',
-        aspp=decoders_cfg.ASPP(
-            level=level, dilation_rates=dilation_rates,
-            num_filters=num_filters))
-
-    factory_network = factory.build_decoder(
-        input_specs=input_specs, model_config=model_config)
-
-    network_config = network.get_config()
-    factory_network_config = factory_network.get_config()
-    # Due to calling `super().get_config()` in aspp layer, everything but the
-    # the name of two layer instances are the same, so we force equal name so it
-    # will not give false alarm.
-    factory_network_config['name'] = network_config['name']
-
-    self.assertEqual(network_config, factory_network_config)
-
-  def test_identity_decoder_creation(self):
-    """Test creation of identity decoder."""
-    model_config = configs.retinanet.RetinaNet()
-    model_config.num_classes = 2
-    model_config.input_size = [None, None, 3]
-
-    model_config.decoder = decoders_cfg.Decoder(
-        type='identity', identity=decoders_cfg.Identity())
-
-    factory_network = factory.build_decoder(
-        input_specs=None, model_config=model_config)
-
-    self.assertIsNone(factory_network)
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/beta/modeling/decoders/fpn.py
+++ b/official/vision/beta/modeling/decoders/fpn.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Contains the definitions of Feature Pyramid Networks (FPN)."""
-from typing import Any, Mapping, Optional
-
-# Import libraries
-from absl import logging
-import tensorflow as tf
-
-from official.modeling import hyperparams
-from official.modeling import tf_utils
-from official.vision.beta.modeling.decoders import factory
-from official.vision.beta.ops import spatial_transform_ops
-
-
-@tf.keras.utils.register_keras_serializable(package='Beta')
-class FPN(tf.keras.Model):
-  """Creates a Feature Pyramid Network (FPN).
-
-  This implemets the paper:
-  Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and
-  Serge Belongie.
-  Feature Pyramid Networks for Object Detection.
-  (https://arxiv.org/pdf/1612.03144)
-  """
-
-  def __init__(
-      self,
-      input_specs: Mapping[str, tf.TensorShape],
-      min_level: int = 3,
-      max_level: int = 7,
-      num_filters: int = 256,
-      fusion_type: str = 'sum',
-      use_separable_conv: bool = False,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      **kwargs):
-    """Initializes a Feature Pyramid Network (FPN).
-
-    Args:
-      input_specs: A `dict` of input specifications. A dictionary consists of
-        {level: TensorShape} from a backbone.
-      min_level: An `int` of minimum level in FPN output feature maps.
-      max_level: An `int` of maximum level in FPN output feature maps.
-      num_filters: An `int` number of filters in FPN layers.
-      fusion_type: A `str` of `sum` or `concat`. Whether performing sum or
-        concat for feature fusion.
-      use_separable_conv: A `bool`.  If True use separable convolution for
-        convolution in FPN layers.
-      activation: A `str` name of the activation function.
-      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_initializer: A `str` name of kernel_initializer for convolutional
-        layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default is None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    self._config_dict = {
-        'input_specs': input_specs,
-        'min_level': min_level,
-        'max_level': max_level,
-        'num_filters': num_filters,
-        'fusion_type': fusion_type,
-        'use_separable_conv': use_separable_conv,
-        'activation': activation,
-        'use_sync_bn': use_sync_bn,
-        'norm_momentum': norm_momentum,
-        'norm_epsilon': norm_epsilon,
-        'kernel_initializer': kernel_initializer,
-        'kernel_regularizer': kernel_regularizer,
-        'bias_regularizer': bias_regularizer,
-    }
-    if use_separable_conv:
-      conv2d = tf.keras.layers.SeparableConv2D
-    else:
-      conv2d = tf.keras.layers.Conv2D
-    if use_sync_bn:
-      norm = tf.keras.layers.experimental.SyncBatchNormalization
-    else:
-      norm = tf.keras.layers.BatchNormalization
-    activation_fn = tf.keras.layers.Activation(
-        tf_utils.get_activation(activation))
-
-    # Build input feature pyramid.
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      bn_axis = -1
-    else:
-      bn_axis = 1
-
-    # Get input feature pyramid from backbone.
-    logging.info('FPN input_specs: %s', input_specs)
-    inputs = self._build_input_pyramid(input_specs, min_level)
-    backbone_max_level = min(int(max(inputs.keys())), max_level)
-
-    # Build lateral connections.
-    feats_lateral = {}
-    for level in range(min_level, backbone_max_level + 1):
-      feats_lateral[str(level)] = conv2d(
-          filters=num_filters,
-          kernel_size=1,
-          padding='same',
-          kernel_initializer=kernel_initializer,
-          kernel_regularizer=kernel_regularizer,
-          bias_regularizer=bias_regularizer)(
-              inputs[str(level)])
-
-    # Build top-down path.
-    feats = {str(backbone_max_level): feats_lateral[str(backbone_max_level)]}
-    for level in range(backbone_max_level - 1, min_level - 1, -1):
-      feat_a = spatial_transform_ops.nearest_upsampling(
-          feats[str(level + 1)], 2)
-      feat_b = feats_lateral[str(level)]
-
-      if fusion_type == 'sum':
-        feats[str(level)] = feat_a + feat_b
-      elif fusion_type == 'concat':
-        feats[str(level)] = tf.concat([feat_a, feat_b], axis=-1)
-      else:
-        raise ValueError('Fusion type {} not supported.'.format(fusion_type))
-
-    # TODO(xianzhi): consider to remove bias in conv2d.
-    # Build post-hoc 3x3 convolution kernel.
-    for level in range(min_level, backbone_max_level + 1):
-      feats[str(level)] = conv2d(
-          filters=num_filters,
-          strides=1,
-          kernel_size=3,
-          padding='same',
-          kernel_initializer=kernel_initializer,
-          kernel_regularizer=kernel_regularizer,
-          bias_regularizer=bias_regularizer)(
-              feats[str(level)])
-
-    # TODO(xianzhi): consider to remove bias in conv2d.
-    # Build coarser FPN levels introduced for RetinaNet.
-    for level in range(backbone_max_level + 1, max_level + 1):
-      feats_in = feats[str(level - 1)]
-      if level > backbone_max_level + 1:
-        feats_in = activation_fn(feats_in)
-      feats[str(level)] = conv2d(
-          filters=num_filters,
-          strides=2,
-          kernel_size=3,
-          padding='same',
-          kernel_initializer=kernel_initializer,
-          kernel_regularizer=kernel_regularizer,
-          bias_regularizer=bias_regularizer)(
-              feats_in)
-
-    # Apply batch norm layers.
-    for level in range(min_level, max_level + 1):
-      feats[str(level)] = norm(
-          axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
-              feats[str(level)])
-
-    self._output_specs = {
-        str(level): feats[str(level)].get_shape()
-        for level in range(min_level, max_level + 1)
-    }
-
-    super(FPN, self).__init__(inputs=inputs, outputs=feats, **kwargs)
-
-  def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape],
-                           min_level: int):
-    assert isinstance(input_specs, dict)
-    if min(input_specs.keys()) > str(min_level):
-      raise ValueError(
-          'Backbone min level should be less or equal to FPN min level')
-
-    inputs = {}
-    for level, spec in input_specs.items():
-      inputs[level] = tf.keras.Input(shape=spec[1:])
-    return inputs
-
-  def get_config(self) -> Mapping[str, Any]:
-    return self._config_dict
-
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-
-  @property
-  def output_specs(self) -> Mapping[str, tf.TensorShape]:
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-
-
-@factory.register_decoder_builder('fpn')
-def build_fpn_decoder(
-    input_specs: Mapping[str, tf.TensorShape],
-    model_config: hyperparams.Config,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
-) -> tf.keras.Model:
-  """Builds FPN decoder from a config.
-
-  Args:
-    input_specs: A `dict` of input specifications. A dictionary consists of
-      {level: TensorShape} from a backbone.
-    model_config: A OneOfConfig. Model config.
-    l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
-      None.
-
-  Returns:
-    A `tf.keras.Model` instance of the FPN decoder.
-
-  Raises:
-    ValueError: If the model_config.decoder.type is not `fpn`.
-  """
-  decoder_type = model_config.decoder.type
-  decoder_cfg = model_config.decoder.get()
-  if decoder_type != 'fpn':
-    raise ValueError(f'Inconsistent decoder type {decoder_type}. '
-                     'Need to be `fpn`.')
-  norm_activation_config = model_config.norm_activation
-  return FPN(
-      input_specs=input_specs,
-      min_level=model_config.min_level,
-      max_level=model_config.max_level,
-      num_filters=decoder_cfg.num_filters,
-      fusion_type=decoder_cfg.fusion_type,
-      use_separable_conv=decoder_cfg.use_separable_conv,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
--- a/official/vision/beta/modeling/decoders/fpn_test.py
+++ b/official/vision/beta/modeling/decoders/fpn_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for FPN."""
-
-# Import libraries
-from absl.testing import parameterized
-import tensorflow as tf
-
-from official.vision.beta.modeling.backbones import mobilenet
-from official.vision.beta.modeling.backbones import resnet
-from official.vision.beta.modeling.decoders import fpn
-
-
-class FPNTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      (256, 3, 7, False, 'sum'),
-      (256, 3, 7, True, 'concat'),
-  )
-  def test_network_creation(self, input_size, min_level, max_level,
-                            use_separable_conv, fusion_type):
-    """Test creation of FPN."""
-    tf.keras.backend.set_image_data_format('channels_last')
-
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-
-    backbone = resnet.ResNet(model_id=50)
-    network = fpn.FPN(
-        input_specs=backbone.output_specs,
-        min_level=min_level,
-        max_level=max_level,
-        fusion_type=fusion_type,
-        use_separable_conv=use_separable_conv)
-
-    endpoints = backbone(inputs)
-    feats = network(endpoints)
-
-    for level in range(min_level, max_level + 1):
-      self.assertIn(str(level), feats)
-      self.assertAllEqual(
-          [1, input_size // 2**level, input_size // 2**level, 256],
-          feats[str(level)].shape.as_list())
-
-  @parameterized.parameters(
-      (256, 3, 7, False),
-      (256, 3, 7, True),
-  )
-  def test_network_creation_with_mobilenet(self, input_size, min_level,
-                                           max_level, use_separable_conv):
-    """Test creation of FPN with mobilenet backbone."""
-    tf.keras.backend.set_image_data_format('channels_last')
-
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-
-    backbone = mobilenet.MobileNet(model_id='MobileNetV2')
-    network = fpn.FPN(
-        input_specs=backbone.output_specs,
-        min_level=min_level,
-        max_level=max_level,
-        use_separable_conv=use_separable_conv)
-
-    endpoints = backbone(inputs)
-    feats = network(endpoints)
-
-    for level in range(min_level, max_level + 1):
-      self.assertIn(str(level), feats)
-      self.assertAllEqual(
-          [1, input_size // 2**level, input_size // 2**level, 256],
-          feats[str(level)].shape.as_list())
-
-  def test_serialize_deserialize(self):
-    # Create a network object that sets all of its config options.
-    kwargs = dict(
-        input_specs=resnet.ResNet(model_id=50).output_specs,
-        min_level=3,
-        max_level=7,
-        num_filters=256,
-        fusion_type='sum',
-        use_separable_conv=False,
-        use_sync_bn=False,
-        activation='relu',
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_initializer='VarianceScaling',
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    network = fpn.FPN(**kwargs)
-
-    expected_config = dict(kwargs)
-    self.assertEqual(network.get_config(), expected_config)
-
-    # Create another network object from the first object's config.
-    new_network = fpn.FPN.from_config(network.get_config())
-
-    # Validate that the config can be forced to JSON.
-    _ = new_network.to_json()
-
-    # If the serialization was successful, the new config should match the old.
-    self.assertAllEqual(network.get_config(), new_network.get_config())
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/beta/modeling/decoders/nasfpn.py
+++ b/official/vision/beta/modeling/decoders/nasfpn.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Contains definitions of NAS-FPN."""
-
-from typing import Any, List, Mapping, Optional, Tuple
-
-# Import libraries
-
-from absl import logging
-import tensorflow as tf
-
-from official.modeling import hyperparams
-from official.modeling import tf_utils
-from official.vision.beta.modeling.decoders import factory
-from official.vision.beta.ops import spatial_transform_ops
-
-
-# The fixed NAS-FPN architecture discovered by NAS.
-# Each element represents a specification of a building block:
-#   (block_level, combine_fn, (input_offset0, input_offset1), is_output).
-NASFPN_BLOCK_SPECS = [
-    (4, 'attention', (1, 3), False),
-    (4, 'sum', (1, 5), False),
-    (3, 'sum', (0, 6), True),
-    (4, 'sum', (6, 7), True),
-    (5, 'attention', (7, 8), True),
-    (7, 'attention', (6, 9), True),
-    (6, 'attention', (9, 10), True),
-]
-
-
-class BlockSpec():
-  """A container class that specifies the block configuration for NAS-FPN."""
-
-  def __init__(self, level: int, combine_fn: str,
-               input_offsets: Tuple[int, int], is_output: bool):
-    self.level = level
-    self.combine_fn = combine_fn
-    self.input_offsets = input_offsets
-    self.is_output = is_output
-
-
-def build_block_specs(
-    block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]:
-  """Builds the list of BlockSpec objects for NAS-FPN."""
-  if not block_specs:
-    block_specs = NASFPN_BLOCK_SPECS
-  logging.info('Building NAS-FPN block specs: %s', block_specs)
-  return [BlockSpec(*b) for b in block_specs]
-
-
-@tf.keras.utils.register_keras_serializable(package='Beta')
-class NASFPN(tf.keras.Model):
-  """Creates a NAS-FPN model.
-
-  This implements the paper:
-  Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le.
-  NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection.
-  (https://arxiv.org/abs/1904.07392)
-  """
-
-  def __init__(
-      self,
-      input_specs: Mapping[str, tf.TensorShape],
-      min_level: int = 3,
-      max_level: int = 7,
-      block_specs: List[BlockSpec] = build_block_specs(),
-      num_filters: int = 256,
-      num_repeats: int = 5,
-      use_separable_conv: bool = False,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_initializer: str = 'VarianceScaling',
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      **kwargs):
-    """Initializes a NAS-FPN model.
-
-    Args:
-      input_specs: A `dict` of input specifications. A dictionary consists of
-        {level: TensorShape} from a backbone.
-      min_level: An `int` of minimum level in FPN output feature maps.
-      max_level: An `int` of maximum level in FPN output feature maps.
-      block_specs: a list of BlockSpec objects that specifies the NAS-FPN
-        network topology. By default, the previously discovered architecture is
-        used.
-      num_filters: An `int` number of filters in FPN layers.
-      num_repeats: number of repeats for feature pyramid network.
-      use_separable_conv: A `bool`.  If True use separable convolution for
-        convolution in FPN layers.
-      activation: A `str` name of the activation function.
-      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_initializer: A `str` name of kernel_initializer for convolutional
-        layers.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default is None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    self._config_dict = {
-        'input_specs': input_specs,
-        'min_level': min_level,
-        'max_level': max_level,
-        'num_filters': num_filters,
-        'num_repeats': num_repeats,
-        'use_separable_conv': use_separable_conv,
-        'activation': activation,
-        'use_sync_bn': use_sync_bn,
-        'norm_momentum': norm_momentum,
-        'norm_epsilon': norm_epsilon,
-        'kernel_initializer': kernel_initializer,
-        'kernel_regularizer': kernel_regularizer,
-        'bias_regularizer': bias_regularizer,
-    }
-    self._min_level = min_level
-    self._max_level = max_level
-    self._block_specs = block_specs
-    self._num_repeats = num_repeats
-    self._conv_op = (tf.keras.layers.SeparableConv2D
-                     if self._config_dict['use_separable_conv']
-                     else tf.keras.layers.Conv2D)
-    if self._config_dict['use_separable_conv']:
-      self._conv_kwargs = {
-          'depthwise_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'pointwise_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'bias_initializer': tf.zeros_initializer(),
-          'depthwise_regularizer': self._config_dict['kernel_regularizer'],
-          'pointwise_regularizer': self._config_dict['kernel_regularizer'],
-          'bias_regularizer': self._config_dict['bias_regularizer'],
-      }
-    else:
-      self._conv_kwargs = {
-          'kernel_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'bias_initializer': tf.zeros_initializer(),
-          'kernel_regularizer': self._config_dict['kernel_regularizer'],
-          'bias_regularizer': self._config_dict['bias_regularizer'],
-      }
-    self._norm_op = (tf.keras.layers.experimental.SyncBatchNormalization
-                     if self._config_dict['use_sync_bn']
-                     else tf.keras.layers.BatchNormalization)
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._bn_axis = -1
-    else:
-      self._bn_axis = 1
-    self._norm_kwargs = {
-        'axis': self._bn_axis,
-        'momentum': self._config_dict['norm_momentum'],
-        'epsilon': self._config_dict['norm_epsilon'],
-    }
-    self._activation = tf_utils.get_activation(activation)
-
-    # Gets input feature pyramid from backbone.
-    inputs = self._build_input_pyramid(input_specs, min_level)
-
-    # Projects the input features.
-    feats = []
-    for level in range(self._min_level, self._max_level + 1):
-      if str(level) in inputs.keys():
-        feats.append(self._resample_feature_map(
-            inputs[str(level)], level, level, self._config_dict['num_filters']))
-      else:
-        feats.append(self._resample_feature_map(
-            feats[-1], level - 1, level, self._config_dict['num_filters']))
-
-    # Repeatly builds the NAS-FPN modules.
-    for _ in range(self._num_repeats):
-      output_feats = self._build_feature_pyramid(feats)
-      feats = [output_feats[level]
-               for level in range(self._min_level, self._max_level + 1)]
-
-    self._output_specs = {
-        str(level): output_feats[level].get_shape()
-        for level in range(min_level, max_level + 1)
-    }
-    output_feats = {str(level): output_feats[level]
-                    for level in output_feats.keys()}
-    super(NASFPN, self).__init__(inputs=inputs, outputs=output_feats, **kwargs)
-
-  def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape],
-                           min_level: int):
-    assert isinstance(input_specs, dict)
-    if min(input_specs.keys()) > str(min_level):
-      raise ValueError(
-          'Backbone min level should be less or equal to FPN min level')
-
-    inputs = {}
-    for level, spec in input_specs.items():
-      inputs[level] = tf.keras.Input(shape=spec[1:])
-    return inputs
-
-  def _resample_feature_map(self,
-                            inputs,
-                            input_level,
-                            target_level,
-                            target_num_filters=256):
-    x = inputs
-    _, _, _, input_num_filters = x.get_shape().as_list()
-    if input_num_filters != target_num_filters:
-      x = self._conv_op(
-          filters=target_num_filters,
-          kernel_size=1,
-          padding='same',
-          **self._conv_kwargs)(x)
-      x = self._norm_op(**self._norm_kwargs)(x)
-
-    if input_level < target_level:
-      stride = int(2 ** (target_level - input_level))
-      return tf.keras.layers.MaxPool2D(
-          pool_size=stride, strides=stride, padding='same')(x)
-    if input_level > target_level:
-      scale = int(2 ** (input_level - target_level))
-      return spatial_transform_ops.nearest_upsampling(x, scale=scale)
-
-    # Force output x to be the same dtype as mixed precision policy. This avoids
-    # dtype mismatch when one input (by default float32 dtype) does not meet all
-    # the above conditions and is output unchanged, while other inputs are
-    # processed to have different dtype, e.g., using bfloat16 on TPU.
-    compute_dtype = tf.keras.layers.Layer().dtype_policy.compute_dtype
-    if (compute_dtype is not None) and (x.dtype != compute_dtype):
-      return tf.cast(x, dtype=compute_dtype)
-    else:
-      return x
-
-  def _global_attention(self, feat0, feat1):
-    m = tf.math.reduce_max(feat0, axis=[1, 2], keepdims=True)
-    m = tf.math.sigmoid(m)
-    return feat0 + feat1 * m
-
-  def _build_feature_pyramid(self, feats):
-    num_output_connections = [0] * len(feats)
-    num_output_levels = self._max_level - self._min_level + 1
-    feat_levels = list(range(self._min_level, self._max_level + 1))
-
-    for i, block_spec in enumerate(self._block_specs):
-      new_level = block_spec.level
-
-      # Checks the range of input_offsets.
-      for input_offset in block_spec.input_offsets:
-        if input_offset >= len(feats):
-          raise ValueError(
-              'input_offset ({}) is larger than num feats({})'.format(
-                  input_offset, len(feats)))
-      input0 = block_spec.input_offsets[0]
-      input1 = block_spec.input_offsets[1]
-
-      # Update graph with inputs.
-      node0 = feats[input0]
-      node0_level = feat_levels[input0]
-      num_output_connections[input0] += 1
-      node0 = self._resample_feature_map(node0, node0_level, new_level)
-      node1 = feats[input1]
-      node1_level = feat_levels[input1]
-      num_output_connections[input1] += 1
-      node1 = self._resample_feature_map(node1, node1_level, new_level)
-
-      # Combine node0 and node1 to create new feat.
-      if block_spec.combine_fn == 'sum':
-        new_node = node0 + node1
-      elif block_spec.combine_fn == 'attention':
-        if node0_level >= node1_level:
-          new_node = self._global_attention(node0, node1)
-        else:
-          new_node = self._global_attention(node1, node0)
-      else:
-        raise ValueError('unknown combine_fn `{}`.'
-                         .format(block_spec.combine_fn))
-
-      # Add intermediate nodes that do not have any connections to output.
-      if block_spec.is_output:
-        for j, (feat, feat_level, num_output) in enumerate(
-            zip(feats, feat_levels, num_output_connections)):
-          if num_output == 0 and feat_level == new_level:
-            num_output_connections[j] += 1
-
-            feat_ = self._resample_feature_map(feat, feat_level, new_level)
-            new_node += feat_
-
-      new_node = self._activation(new_node)
-      new_node = self._conv_op(
-          filters=self._config_dict['num_filters'],
-          kernel_size=(3, 3),
-          padding='same',
-          **self._conv_kwargs)(new_node)
-      new_node = self._norm_op(**self._norm_kwargs)(new_node)
-
-      feats.append(new_node)
-      feat_levels.append(new_level)
-      num_output_connections.append(0)
-
-    output_feats = {}
-    for i in range(len(feats) - num_output_levels, len(feats)):
-      level = feat_levels[i]
-      output_feats[level] = feats[i]
-    logging.info('Output feature pyramid: %s', output_feats)
-    return output_feats
-
-  def get_config(self) -> Mapping[str, Any]:
-    return self._config_dict
-
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-
-  @property
-  def output_specs(self) -> Mapping[str, tf.TensorShape]:
-    """A dict of {level: TensorShape} pairs for the model output."""
-    return self._output_specs
-
-
-@factory.register_decoder_builder('nasfpn')
-def build_nasfpn_decoder(
-    input_specs: Mapping[str, tf.TensorShape],
-    model_config: hyperparams.Config,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
-) -> tf.keras.Model:
-  """Builds NASFPN decoder from a config.
-
-  Args:
-    input_specs: A `dict` of input specifications. A dictionary consists of
-      {level: TensorShape} from a backbone.
-    model_config: A OneOfConfig. Model config.
-    l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
-      None.
-
-  Returns:
-    A `tf.keras.Model` instance of the NASFPN decoder.
-
-  Raises:
-    ValueError: If the model_config.decoder.type is not `nasfpn`.
-  """
-  decoder_type = model_config.decoder.type
-  decoder_cfg = model_config.decoder.get()
-  if decoder_type != 'nasfpn':
-    raise ValueError(f'Inconsistent decoder type {decoder_type}. '
-                     'Need to be `nasfpn`.')
-
-  norm_activation_config = model_config.norm_activation
-  return NASFPN(
-      input_specs=input_specs,
-      min_level=model_config.min_level,
-      max_level=model_config.max_level,
-      num_filters=decoder_cfg.num_filters,
-      num_repeats=decoder_cfg.num_repeats,
-      use_separable_conv=decoder_cfg.use_separable_conv,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
--- a/official/vision/beta/modeling/decoders/nasfpn_test.py
+++ b/official/vision/beta/modeling/decoders/nasfpn_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for NAS-FPN."""
-
-# Import libraries
-from absl.testing import parameterized
-import tensorflow as tf
-
-from official.vision.beta.modeling.backbones import resnet
-from official.vision.beta.modeling.decoders import nasfpn
-
-
-class NASFPNTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      (256, 3, 7, False),
-      (256, 3, 7, True),
-  )
-  def test_network_creation(self, input_size, min_level, max_level,
-                            use_separable_conv):
-    """Test creation of NAS-FPN."""
-    tf.keras.backend.set_image_data_format('channels_last')
-
-    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
-
-    num_filters = 256
-    backbone = resnet.ResNet(model_id=50)
-    network = nasfpn.NASFPN(
-        input_specs=backbone.output_specs,
-        min_level=min_level,
-        max_level=max_level,
-        num_filters=num_filters,
-        use_separable_conv=use_separable_conv)
-
-    endpoints = backbone(inputs)
-    feats = network(endpoints)
-
-    for level in range(min_level, max_level + 1):
-      self.assertIn(str(level), feats)
-      self.assertAllEqual(
-          [1, input_size // 2**level, input_size // 2**level, num_filters],
-          feats[str(level)].shape.as_list())
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/beta/modeling/factory.py
+++ b/official/vision/beta/modeling/factory.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Factory methods to build models."""
-
-from typing import Optional
-
-import tensorflow as tf
-
-from official.vision.beta.configs import image_classification as classification_cfg
-from official.vision.beta.configs import maskrcnn as maskrcnn_cfg
-from official.vision.beta.configs import retinanet as retinanet_cfg
-from official.vision.beta.configs import semantic_segmentation as segmentation_cfg
-from official.vision.beta.modeling import backbones
-from official.vision.beta.modeling import classification_model
-from official.vision.beta.modeling import decoders
-from official.vision.beta.modeling import maskrcnn_model
-from official.vision.beta.modeling import retinanet_model
-from official.vision.beta.modeling import segmentation_model
-from official.vision.beta.modeling.heads import dense_prediction_heads
-from official.vision.beta.modeling.heads import instance_heads
-from official.vision.beta.modeling.heads import segmentation_heads
-from official.vision.beta.modeling.layers import detection_generator
-from official.vision.beta.modeling.layers import mask_sampler
-from official.vision.beta.modeling.layers import roi_aligner
-from official.vision.beta.modeling.layers import roi_generator
-from official.vision.beta.modeling.layers import roi_sampler
-
-
-def build_classification_model(
-    input_specs: tf.keras.layers.InputSpec,
-    model_config: classification_cfg.ImageClassificationModel,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-    skip_logits_layer: bool = False,
-    backbone: Optional[tf.keras.Model] = None) -> tf.keras.Model:
-  """Builds the classification model."""
-  norm_activation_config = model_config.norm_activation
-  if not backbone:
-    backbone = backbones.factory.build_backbone(
-        input_specs=input_specs,
-        backbone_config=model_config.backbone,
-        norm_activation_config=norm_activation_config,
-        l2_regularizer=l2_regularizer)
-
-  model = classification_model.ClassificationModel(
-      backbone=backbone,
-      num_classes=model_config.num_classes,
-      input_specs=input_specs,
-      dropout_rate=model_config.dropout_rate,
-      kernel_initializer=model_config.kernel_initializer,
-      kernel_regularizer=l2_regularizer,
-      add_head_batch_norm=model_config.add_head_batch_norm,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      skip_logits_layer=skip_logits_layer)
-  return model
-
-
-def build_maskrcnn(input_specs: tf.keras.layers.InputSpec,
-                   model_config: maskrcnn_cfg.MaskRCNN,
-                   l2_regularizer: Optional[
-                       tf.keras.regularizers.Regularizer] = None,
-                   backbone: Optional[tf.keras.Model] = None,
-                   decoder: Optional[tf.keras.Model] = None) -> tf.keras.Model:
-  """Builds Mask R-CNN model."""
-  norm_activation_config = model_config.norm_activation
-  if not backbone:
-    backbone = backbones.factory.build_backbone(
-        input_specs=input_specs,
-        backbone_config=model_config.backbone,
-        norm_activation_config=norm_activation_config,
-        l2_regularizer=l2_regularizer)
-  backbone_features = backbone(tf.keras.Input(input_specs.shape[1:]))
-
-  if not decoder:
-    decoder = decoders.factory.build_decoder(
-        input_specs=backbone.output_specs,
-        model_config=model_config,
-        l2_regularizer=l2_regularizer)
-
-  rpn_head_config = model_config.rpn_head
-  roi_generator_config = model_config.roi_generator
-  roi_sampler_config = model_config.roi_sampler
-  roi_aligner_config = model_config.roi_aligner
-  detection_head_config = model_config.detection_head
-  generator_config = model_config.detection_generator
-  num_anchors_per_location = (
-      len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales)
-
-  rpn_head = dense_prediction_heads.RPNHead(
-      min_level=model_config.min_level,
-      max_level=model_config.max_level,
-      num_anchors_per_location=num_anchors_per_location,
-      num_convs=rpn_head_config.num_convs,
-      num_filters=rpn_head_config.num_filters,
-      use_separable_conv=rpn_head_config.use_separable_conv,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
-
-  detection_head = instance_heads.DetectionHead(
-      num_classes=model_config.num_classes,
-      num_convs=detection_head_config.num_convs,
-      num_filters=detection_head_config.num_filters,
-      use_separable_conv=detection_head_config.use_separable_conv,
-      num_fcs=detection_head_config.num_fcs,
-      fc_dims=detection_head_config.fc_dims,
-      class_agnostic_bbox_pred=detection_head_config.class_agnostic_bbox_pred,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer,
-      name='detection_head')
-
-  if decoder:
-    decoder_features = decoder(backbone_features)
-    rpn_head(decoder_features)
-
-  if roi_sampler_config.cascade_iou_thresholds:
-    detection_head_cascade = [detection_head]
-    for cascade_num in range(len(roi_sampler_config.cascade_iou_thresholds)):
-      detection_head = instance_heads.DetectionHead(
-          num_classes=model_config.num_classes,
-          num_convs=detection_head_config.num_convs,
-          num_filters=detection_head_config.num_filters,
-          use_separable_conv=detection_head_config.use_separable_conv,
-          num_fcs=detection_head_config.num_fcs,
-          fc_dims=detection_head_config.fc_dims,
-          class_agnostic_bbox_pred=detection_head_config
-          .class_agnostic_bbox_pred,
-          activation=norm_activation_config.activation,
-          use_sync_bn=norm_activation_config.use_sync_bn,
-          norm_momentum=norm_activation_config.norm_momentum,
-          norm_epsilon=norm_activation_config.norm_epsilon,
-          kernel_regularizer=l2_regularizer,
-          name='detection_head_{}'.format(cascade_num + 1))
-
-      detection_head_cascade.append(detection_head)
-    detection_head = detection_head_cascade
-
-  roi_generator_obj = roi_generator.MultilevelROIGenerator(
-      pre_nms_top_k=roi_generator_config.pre_nms_top_k,
-      pre_nms_score_threshold=roi_generator_config.pre_nms_score_threshold,
-      pre_nms_min_size_threshold=(
-          roi_generator_config.pre_nms_min_size_threshold),
-      nms_iou_threshold=roi_generator_config.nms_iou_threshold,
-      num_proposals=roi_generator_config.num_proposals,
-      test_pre_nms_top_k=roi_generator_config.test_pre_nms_top_k,
-      test_pre_nms_score_threshold=(
-          roi_generator_config.test_pre_nms_score_threshold),
-      test_pre_nms_min_size_threshold=(
-          roi_generator_config.test_pre_nms_min_size_threshold),
-      test_nms_iou_threshold=roi_generator_config.test_nms_iou_threshold,
-      test_num_proposals=roi_generator_config.test_num_proposals,
-      use_batched_nms=roi_generator_config.use_batched_nms)
-
-  roi_sampler_cascade = []
-  roi_sampler_obj = roi_sampler.ROISampler(
-      mix_gt_boxes=roi_sampler_config.mix_gt_boxes,
-      num_sampled_rois=roi_sampler_config.num_sampled_rois,
-      foreground_fraction=roi_sampler_config.foreground_fraction,
-      foreground_iou_threshold=roi_sampler_config.foreground_iou_threshold,
-      background_iou_high_threshold=(
-          roi_sampler_config.background_iou_high_threshold),
-      background_iou_low_threshold=(
-          roi_sampler_config.background_iou_low_threshold))
-  roi_sampler_cascade.append(roi_sampler_obj)
-  # Initialize addtional roi simplers for cascade heads.
-  if roi_sampler_config.cascade_iou_thresholds:
-    for iou in roi_sampler_config.cascade_iou_thresholds:
-      roi_sampler_obj = roi_sampler.ROISampler(
-          mix_gt_boxes=False,
-          num_sampled_rois=roi_sampler_config.num_sampled_rois,
-          foreground_iou_threshold=iou,
-          background_iou_high_threshold=iou,
-          background_iou_low_threshold=0.0,
-          skip_subsampling=True)
-      roi_sampler_cascade.append(roi_sampler_obj)
-
-  roi_aligner_obj = roi_aligner.MultilevelROIAligner(
-      crop_size=roi_aligner_config.crop_size,
-      sample_offset=roi_aligner_config.sample_offset)
-
-  detection_generator_obj = detection_generator.DetectionGenerator(
-      apply_nms=generator_config.apply_nms,
-      pre_nms_top_k=generator_config.pre_nms_top_k,
-      pre_nms_score_threshold=generator_config.pre_nms_score_threshold,
-      nms_iou_threshold=generator_config.nms_iou_threshold,
-      max_num_detections=generator_config.max_num_detections,
-      nms_version=generator_config.nms_version,
-      use_cpu_nms=generator_config.use_cpu_nms,
-      soft_nms_sigma=generator_config.soft_nms_sigma)
-
-  if model_config.include_mask:
-    mask_head = instance_heads.MaskHead(
-        num_classes=model_config.num_classes,
-        upsample_factor=model_config.mask_head.upsample_factor,
-        num_convs=model_config.mask_head.num_convs,
-        num_filters=model_config.mask_head.num_filters,
-        use_separable_conv=model_config.mask_head.use_separable_conv,
-        activation=model_config.norm_activation.activation,
-        norm_momentum=model_config.norm_activation.norm_momentum,
-        norm_epsilon=model_config.norm_activation.norm_epsilon,
-        kernel_regularizer=l2_regularizer,
-        class_agnostic=model_config.mask_head.class_agnostic)
-
-    mask_sampler_obj = mask_sampler.MaskSampler(
-        mask_target_size=(
-            model_config.mask_roi_aligner.crop_size *
-            model_config.mask_head.upsample_factor),
-        num_sampled_masks=model_config.mask_sampler.num_sampled_masks)
-
-    mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(
-        crop_size=model_config.mask_roi_aligner.crop_size,
-        sample_offset=model_config.mask_roi_aligner.sample_offset)
-  else:
-    mask_head = None
-    mask_sampler_obj = None
-    mask_roi_aligner_obj = None
-
-  model = maskrcnn_model.MaskRCNNModel(
-      backbone=backbone,
-      decoder=decoder,
-      rpn_head=rpn_head,
-      detection_head=detection_head,
-      roi_generator=roi_generator_obj,
-      roi_sampler=roi_sampler_cascade,
-      roi_aligner=roi_aligner_obj,
-      detection_generator=detection_generator_obj,
-      mask_head=mask_head,
-      mask_sampler=mask_sampler_obj,
-      mask_roi_aligner=mask_roi_aligner_obj,
-      class_agnostic_bbox_pred=detection_head_config.class_agnostic_bbox_pred,
-      cascade_class_ensemble=detection_head_config.cascade_class_ensemble,
-      min_level=model_config.min_level,
-      max_level=model_config.max_level,
-      num_scales=model_config.anchor.num_scales,
-      aspect_ratios=model_config.anchor.aspect_ratios,
-      anchor_size=model_config.anchor.anchor_size)
-  return model
-
-
-def build_retinanet(
-    input_specs: tf.keras.layers.InputSpec,
-    model_config: retinanet_cfg.RetinaNet,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-    backbone: Optional[tf.keras.Model] = None,
-    decoder: Optional[tf.keras.regularizers.Regularizer] = None
-) -> tf.keras.Model:
-  """Builds RetinaNet model."""
-  norm_activation_config = model_config.norm_activation
-  if not backbone:
-    backbone = backbones.factory.build_backbone(
-        input_specs=input_specs,
-        backbone_config=model_config.backbone,
-        norm_activation_config=norm_activation_config,
-        l2_regularizer=l2_regularizer)
-  backbone_features = backbone(tf.keras.Input(input_specs.shape[1:]))
-
-  if not decoder:
-    decoder = decoders.factory.build_decoder(
-        input_specs=backbone.output_specs,
-        model_config=model_config,
-        l2_regularizer=l2_regularizer)
-
-  head_config = model_config.head
-  generator_config = model_config.detection_generator
-  num_anchors_per_location = (
-      len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales)
-
-  head = dense_prediction_heads.RetinaNetHead(
-      min_level=model_config.min_level,
-      max_level=model_config.max_level,
-      num_classes=model_config.num_classes,
-      num_anchors_per_location=num_anchors_per_location,
-      num_convs=head_config.num_convs,
-      num_filters=head_config.num_filters,
-      attribute_heads=[
-          cfg.as_dict() for cfg in (head_config.attribute_heads or [])
-      ],
-      use_separable_conv=head_config.use_separable_conv,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
-
-  # Builds decoder and head so that their trainable weights are initialized
-  if decoder:
-    decoder_features = decoder(backbone_features)
-    _ = head(decoder_features)
-
-  detection_generator_obj = detection_generator.MultilevelDetectionGenerator(
-      apply_nms=generator_config.apply_nms,
-      pre_nms_top_k=generator_config.pre_nms_top_k,
-      pre_nms_score_threshold=generator_config.pre_nms_score_threshold,
-      nms_iou_threshold=generator_config.nms_iou_threshold,
-      max_num_detections=generator_config.max_num_detections,
-      nms_version=generator_config.nms_version,
-      use_cpu_nms=generator_config.use_cpu_nms,
-      soft_nms_sigma=generator_config.soft_nms_sigma)
-
-  model = retinanet_model.RetinaNetModel(
-      backbone,
-      decoder,
-      head,
-      detection_generator_obj,
-      min_level=model_config.min_level,
-      max_level=model_config.max_level,
-      num_scales=model_config.anchor.num_scales,
-      aspect_ratios=model_config.anchor.aspect_ratios,
-      anchor_size=model_config.anchor.anchor_size)
-  return model
-
-
-def build_segmentation_model(
-    input_specs: tf.keras.layers.InputSpec,
-    model_config: segmentation_cfg.SemanticSegmentationModel,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-    backbone: Optional[tf.keras.regularizers.Regularizer] = None,
-    decoder: Optional[tf.keras.regularizers.Regularizer] = None
-) -> tf.keras.Model:
-  """Builds Segmentation model."""
-  norm_activation_config = model_config.norm_activation
-  if not backbone:
-    backbone = backbones.factory.build_backbone(
-        input_specs=input_specs,
-        backbone_config=model_config.backbone,
-        norm_activation_config=norm_activation_config,
-        l2_regularizer=l2_regularizer)
-
-  if not decoder:
-    decoder = decoders.factory.build_decoder(
-        input_specs=backbone.output_specs,
-        model_config=model_config,
-        l2_regularizer=l2_regularizer)
-
-  head_config = model_config.head
-
-  head = segmentation_heads.SegmentationHead(
-      num_classes=model_config.num_classes,
-      level=head_config.level,
-      num_convs=head_config.num_convs,
-      prediction_kernel_size=head_config.prediction_kernel_size,
-      num_filters=head_config.num_filters,
-      use_depthwise_convolution=head_config.use_depthwise_convolution,
-      upsample_factor=head_config.upsample_factor,
-      feature_fusion=head_config.feature_fusion,
-      low_level=head_config.low_level,
-      low_level_num_filters=head_config.low_level_num_filters,
-      activation=norm_activation_config.activation,
-      use_sync_bn=norm_activation_config.use_sync_bn,
-      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon,
-      kernel_regularizer=l2_regularizer)
-
-  mask_scoring_head = None
-  if model_config.mask_scoring_head:
-    mask_scoring_head = segmentation_heads.MaskScoring(
-        num_classes=model_config.num_classes,
-        **model_config.mask_scoring_head.as_dict(),
-        activation=norm_activation_config.activation,
-        use_sync_bn=norm_activation_config.use_sync_bn,
-        norm_momentum=norm_activation_config.norm_momentum,
-        norm_epsilon=norm_activation_config.norm_epsilon,
-        kernel_regularizer=l2_regularizer)
-
-  model = segmentation_model.SegmentationModel(
-      backbone, decoder, head, mask_scoring_head=mask_scoring_head)
-  return model
--- a/official/vision/beta/modeling/factory_3d.py
+++ b/official/vision/beta/modeling/factory_3d.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Factory methods to build models."""
-
-# Import libraries
-import tensorflow as tf
-
-from official.core import registry
-from official.vision.beta.configs import video_classification as video_classification_cfg
-from official.vision.beta.modeling import video_classification_model
-from official.vision.beta.modeling import backbones
-
-_REGISTERED_MODEL_CLS = {}
-
-
-def register_model_builder(key: str):
-  """Decorates a builder of model class.
-
-  The builder should be a Callable (a class or a function).
-  This decorator supports registration of backbone builder as follows:
-
-  ```
-  class MyModel(tf.keras.Model):
-    pass
-
-  @register_backbone_builder('mybackbone')
-  def builder(input_specs, config, l2_reg):
-    return MyModel(...)
-
-  # Builds a MyModel object.
-  my_backbone = build_backbone_3d(input_specs, config, l2_reg)
-  ```
-
-  Args:
-    key: the key to look up the builder.
-
-  Returns:
-    A callable for use as class decorator that registers the decorated class
-    for creation from an instance of model class.
-  """
-  return registry.register(_REGISTERED_MODEL_CLS, key)
-
-
-def build_model(
-    model_type: str,
-    input_specs: tf.keras.layers.InputSpec,
-    model_config: video_classification_cfg.hyperparams.Config,
-    num_classes: int,
-    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
-  """Builds backbone from a config.
-
-  Args:
-    model_type: string name of model type. It should be consistent with
-      ModelConfig.model_type.
-    input_specs: tf.keras.layers.InputSpec.
-    model_config: a OneOfConfig. Model config.
-    num_classes: number of classes.
-    l2_regularizer: tf.keras.regularizers.Regularizer instance. Default to None.
-
-  Returns:
-    tf.keras.Model instance of the backbone.
-  """
-  model_builder = registry.lookup(_REGISTERED_MODEL_CLS, model_type)
-
-  return model_builder(input_specs, model_config, num_classes, l2_regularizer)
-
-
-@register_model_builder('video_classification')
-def build_video_classification_model(
-    input_specs: tf.keras.layers.InputSpec,
-    model_config: video_classification_cfg.VideoClassificationModel,
-    num_classes: int,
-    l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
-  """Builds the video classification model."""
-  input_specs_dict = {'image': input_specs}
-  norm_activation_config = model_config.norm_activation
-  backbone = backbones.factory.build_backbone(
-      input_specs=input_specs,
-      backbone_config=model_config.backbone,
-      norm_activation_config=norm_activation_config,
-      l2_regularizer=l2_regularizer)
-
-  model = video_classification_model.VideoClassificationModel(
-      backbone=backbone,
-      num_classes=num_classes,
-      input_specs=input_specs_dict,
-      dropout_rate=model_config.dropout_rate,
-      aggregate_endpoints=model_config.aggregate_endpoints,
-      kernel_regularizer=l2_regularizer,
-      require_endpoints=model_config.require_endpoints)
-  return model
--- a/official/vision/beta/modeling/factory_test.py
+++ b/official/vision/beta/modeling/factory_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for factory.py."""
-
-# Import libraries
-from absl.testing import parameterized
-import tensorflow as tf
-
-from official.vision.beta.configs import backbones
-from official.vision.beta.configs import backbones_3d
-from official.vision.beta.configs import image_classification as classification_cfg
-from official.vision.beta.configs import maskrcnn as maskrcnn_cfg
-from official.vision.beta.configs import retinanet as retinanet_cfg
-from official.vision.beta.configs import video_classification as video_classification_cfg
-from official.vision.beta.modeling import factory
-from official.vision.beta.modeling import factory_3d
-
-
-class ClassificationModelBuilderTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      ('resnet', (224, 224), 5e-5),
-      ('resnet', (224, 224), None),
-      ('resnet', (None, None), 5e-5),
-      ('resnet', (None, None), None),
-  )
-  def test_builder(self, backbone_type, input_size, weight_decay):
-    num_classes = 2
-    input_specs = tf.keras.layers.InputSpec(
-        shape=[None, input_size[0], input_size[1], 3])
-    model_config = classification_cfg.ImageClassificationModel(
-        num_classes=num_classes,
-        backbone=backbones.Backbone(type=backbone_type))
-    l2_regularizer = (
-        tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
-    _ = factory.build_classification_model(
-        input_specs=input_specs,
-        model_config=model_config,
-        l2_regularizer=l2_regularizer)
-
-
-class MaskRCNNBuilderTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      ('resnet', (640, 640)),
-      ('resnet', (None, None)),
-  )
-  def test_builder(self, backbone_type, input_size):
-    num_classes = 2
-    input_specs = tf.keras.layers.InputSpec(
-        shape=[None, input_size[0], input_size[1], 3])
-    model_config = maskrcnn_cfg.MaskRCNN(
-        num_classes=num_classes,
-        backbone=backbones.Backbone(type=backbone_type))
-    l2_regularizer = tf.keras.regularizers.l2(5e-5)
-    _ = factory.build_maskrcnn(
-        input_specs=input_specs,
-        model_config=model_config,
-        l2_regularizer=l2_regularizer)
-
-
-class RetinaNetBuilderTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      ('resnet', (640, 640), False),
-      ('resnet', (None, None), True),
-  )
-  def test_builder(self, backbone_type, input_size, has_att_heads):
-    num_classes = 2
-    input_specs = tf.keras.layers.InputSpec(
-        shape=[None, input_size[0], input_size[1], 3])
-    if has_att_heads:
-      attribute_heads_config = [
-          retinanet_cfg.AttributeHead(name='att1'),
-          retinanet_cfg.AttributeHead(
-              name='att2', type='classification', size=2),
-      ]
-    else:
-      attribute_heads_config = None
-    model_config = retinanet_cfg.RetinaNet(
-        num_classes=num_classes,
-        backbone=backbones.Backbone(type=backbone_type),
-        head=retinanet_cfg.RetinaNetHead(
-            attribute_heads=attribute_heads_config))
-    l2_regularizer = tf.keras.regularizers.l2(5e-5)
-    _ = factory.build_retinanet(
-        input_specs=input_specs,
-        model_config=model_config,
-        l2_regularizer=l2_regularizer)
-    if has_att_heads:
-      self.assertEqual(model_config.head.attribute_heads[0].as_dict(),
-                       dict(name='att1', type='regression', size=1))
-      self.assertEqual(model_config.head.attribute_heads[1].as_dict(),
-                       dict(name='att2', type='classification', size=2))
-
-
-class VideoClassificationModelBuilderTest(parameterized.TestCase,
-                                          tf.test.TestCase):
-
-  @parameterized.parameters(
-      ('resnet_3d', (8, 224, 224), 5e-5),
-      ('resnet_3d', (None, None, None), 5e-5),
-  )
-  def test_builder(self, backbone_type, input_size, weight_decay):
-    input_specs = tf.keras.layers.InputSpec(
-        shape=[None, input_size[0], input_size[1], input_size[2], 3])
-    model_config = video_classification_cfg.VideoClassificationModel(
-        backbone=backbones_3d.Backbone3D(type=backbone_type))
-    l2_regularizer = (
-        tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
-    _ = factory_3d.build_video_classification_model(
-        input_specs=input_specs,
-        model_config=model_config,
-        num_classes=2,
-        l2_regularizer=l2_regularizer)
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/beta/modeling/heads/__init__.py
+++ b/official/vision/beta/modeling/heads/__init__.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Heads package definition."""
-
-from official.vision.beta.modeling.heads.dense_prediction_heads import RetinaNetHead
-from official.vision.beta.modeling.heads.dense_prediction_heads import RPNHead
-from official.vision.beta.modeling.heads.instance_heads import DetectionHead
-from official.vision.beta.modeling.heads.instance_heads import MaskHead
-from official.vision.beta.modeling.heads.segmentation_heads import SegmentationHead
--- a/official/vision/beta/modeling/heads/dense_prediction_heads.py
+++ b/official/vision/beta/modeling/heads/dense_prediction_heads.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Contains definitions of dense prediction heads."""
-
-from typing import Any, Dict, List, Mapping, Optional, Union
-
-# Import libraries
-
-import numpy as np
-import tensorflow as tf
-
-from official.modeling import tf_utils
-
-
-@tf.keras.utils.register_keras_serializable(package='Beta')
-class RetinaNetHead(tf.keras.layers.Layer):
-  """Creates a RetinaNet head."""
-
-  def __init__(
-      self,
-      min_level: int,
-      max_level: int,
-      num_classes: int,
-      num_anchors_per_location: int,
-      num_convs: int = 4,
-      num_filters: int = 256,
-      attribute_heads: Optional[List[Dict[str, Any]]] = None,
-      use_separable_conv: bool = False,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      num_params_per_anchor: int = 4,
-      **kwargs):
-    """Initializes a RetinaNet head.
-
-    Args:
-      min_level: An `int` number of minimum feature level.
-      max_level: An `int` number of maximum feature level.
-      num_classes: An `int` number of classes to predict.
-      num_anchors_per_location: An `int` number of number of anchors per pixel
-        location.
-      num_convs: An `int` number that represents the number of the intermediate
-        conv layers before the prediction.
-      num_filters: An `int` number that represents the number of filters of the
-        intermediate conv layers.
-      attribute_heads: If not None, a list that contains a dict for each
-        additional attribute head. Each dict consists of 3 key-value pairs:
-        `name`, `type` ('regression' or 'classification'), and `size` (number
-        of predicted values for each instance).
-      use_separable_conv: A `bool` that indicates whether the separable
-        convolution layers is used.
-      activation: A `str` that indicates which activation is used, e.g. 'relu',
-        'swish', etc.
-      use_sync_bn: A `bool` that indicates whether to use synchronized batch
-        normalization across different replicas.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default is None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-      num_params_per_anchor: Number of parameters required to specify an anchor
-        box. For example, `num_params_per_anchor` would be 4 for axis-aligned
-        anchor boxes specified by their y-centers, x-centers, heights, and
-        widths.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    super(RetinaNetHead, self).__init__(**kwargs)
-    self._config_dict = {
-        'min_level': min_level,
-        'max_level': max_level,
-        'num_classes': num_classes,
-        'num_anchors_per_location': num_anchors_per_location,
-        'num_convs': num_convs,
-        'num_filters': num_filters,
-        'attribute_heads': attribute_heads,
-        'use_separable_conv': use_separable_conv,
-        'activation': activation,
-        'use_sync_bn': use_sync_bn,
-        'norm_momentum': norm_momentum,
-        'norm_epsilon': norm_epsilon,
-        'kernel_regularizer': kernel_regularizer,
-        'bias_regularizer': bias_regularizer,
-        'num_params_per_anchor': num_params_per_anchor,
-    }
-
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._bn_axis = -1
-    else:
-      self._bn_axis = 1
-    self._activation = tf_utils.get_activation(activation)
-
-  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
-    """Creates the variables of the head."""
-    conv_op = (tf.keras.layers.SeparableConv2D
-               if self._config_dict['use_separable_conv']
-               else tf.keras.layers.Conv2D)
-    conv_kwargs = {
-        'filters': self._config_dict['num_filters'],
-        'kernel_size': 3,
-        'padding': 'same',
-        'bias_initializer': tf.zeros_initializer(),
-        'bias_regularizer': self._config_dict['bias_regularizer'],
-    }
-    if not self._config_dict['use_separable_conv']:
-      conv_kwargs.update({
-          'kernel_initializer': tf.keras.initializers.RandomNormal(
-              stddev=0.01),
-          'kernel_regularizer': self._config_dict['kernel_regularizer'],
-      })
-    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
-             if self._config_dict['use_sync_bn']
-             else tf.keras.layers.BatchNormalization)
-    bn_kwargs = {
-        'axis': self._bn_axis,
-        'momentum': self._config_dict['norm_momentum'],
-        'epsilon': self._config_dict['norm_epsilon'],
-    }
-
-    # Class net.
-    self._cls_convs = []
-    self._cls_norms = []
-    for level in range(
-        self._config_dict['min_level'], self._config_dict['max_level'] + 1):
-      this_level_cls_norms = []
-      for i in range(self._config_dict['num_convs']):
-        if level == self._config_dict['min_level']:
-          cls_conv_name = 'classnet-conv_{}'.format(i)
-          self._cls_convs.append(conv_op(name=cls_conv_name, **conv_kwargs))
-        cls_norm_name = 'classnet-conv-norm_{}_{}'.format(level, i)
-        this_level_cls_norms.append(bn_op(name=cls_norm_name, **bn_kwargs))
-      self._cls_norms.append(this_level_cls_norms)
-
-    classifier_kwargs = {
-        'filters': (
-            self._config_dict['num_classes'] *
-            self._config_dict['num_anchors_per_location']),
-        'kernel_size': 3,
-        'padding': 'same',
-        'bias_initializer': tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
-        'bias_regularizer': self._config_dict['bias_regularizer'],
-    }
-    if not self._config_dict['use_separable_conv']:
-      classifier_kwargs.update({
-          'kernel_initializer': tf.keras.initializers.RandomNormal(stddev=1e-5),
-          'kernel_regularizer': self._config_dict['kernel_regularizer'],
-      })
-    self._classifier = conv_op(name='scores', **classifier_kwargs)
-
-    # Box net.
-    self._box_convs = []
-    self._box_norms = []
-    for level in range(
-        self._config_dict['min_level'], self._config_dict['max_level'] + 1):
-      this_level_box_norms = []
-      for i in range(self._config_dict['num_convs']):
-        if level == self._config_dict['min_level']:
-          box_conv_name = 'boxnet-conv_{}'.format(i)
-          self._box_convs.append(conv_op(name=box_conv_name, **conv_kwargs))
-        box_norm_name = 'boxnet-conv-norm_{}_{}'.format(level, i)
-        this_level_box_norms.append(bn_op(name=box_norm_name, **bn_kwargs))
-      self._box_norms.append(this_level_box_norms)
-
-    box_regressor_kwargs = {
-        'filters': (self._config_dict['num_params_per_anchor'] *
-                    self._config_dict['num_anchors_per_location']),
-        'kernel_size': 3,
-        'padding': 'same',
-        'bias_initializer': tf.zeros_initializer(),
-        'bias_regularizer': self._config_dict['bias_regularizer'],
-    }
-    if not self._config_dict['use_separable_conv']:
-      box_regressor_kwargs.update({
-          'kernel_initializer': tf.keras.initializers.RandomNormal(
-              stddev=1e-5),
-          'kernel_regularizer': self._config_dict['kernel_regularizer'],
-      })
-    self._box_regressor = conv_op(name='boxes', **box_regressor_kwargs)
-
-    # Attribute learning nets.
-    if self._config_dict['attribute_heads']:
-      self._att_predictors = {}
-      self._att_convs = {}
-      self._att_norms = {}
-
-      for att_config in self._config_dict['attribute_heads']:
-        att_name = att_config['name']
-        att_type = att_config['type']
-        att_size = att_config['size']
-        att_convs_i = []
-        att_norms_i = []
-
-        # Build conv and norm layers.
-        for level in range(self._config_dict['min_level'],
-                           self._config_dict['max_level'] + 1):
-          this_level_att_norms = []
-          for i in range(self._config_dict['num_convs']):
-            if level == self._config_dict['min_level']:
-              att_conv_name = '{}-conv_{}'.format(att_name, i)
-              att_convs_i.append(conv_op(name=att_conv_name, **conv_kwargs))
-            att_norm_name = '{}-conv-norm_{}_{}'.format(att_name, level, i)
-            this_level_att_norms.append(bn_op(name=att_norm_name, **bn_kwargs))
-          att_norms_i.append(this_level_att_norms)
-        self._att_convs[att_name] = att_convs_i
-        self._att_norms[att_name] = att_norms_i
-
-        # Build the final prediction layer.
-        att_predictor_kwargs = {
-            'filters':
-                (att_size * self._config_dict['num_anchors_per_location']),
-            'kernel_size': 3,
-            'padding': 'same',
-            'bias_initializer': tf.zeros_initializer(),
-            'bias_regularizer': self._config_dict['bias_regularizer'],
-        }
-        if att_type == 'regression':
-          att_predictor_kwargs.update(
-              {'bias_initializer': tf.zeros_initializer()})
-        elif att_type == 'classification':
-          att_predictor_kwargs.update({
-              'bias_initializer':
-                  tf.constant_initializer(-np.log((1 - 0.01) / 0.01))
-          })
-        else:
-          raise ValueError(
-              'Attribute head type {} not supported.'.format(att_type))
-
-        if not self._config_dict['use_separable_conv']:
-          att_predictor_kwargs.update({
-              'kernel_initializer':
-                  tf.keras.initializers.RandomNormal(stddev=1e-5),
-              'kernel_regularizer':
-                  self._config_dict['kernel_regularizer'],
-          })
-
-        self._att_predictors[att_name] = conv_op(
-            name='{}_attributes'.format(att_name), **att_predictor_kwargs)
-
-    super(RetinaNetHead, self).build(input_shape)
-
-  def call(self, features: Mapping[str, tf.Tensor]):
-    """Forward pass of the RetinaNet head.
-
-    Args:
-      features: A `dict` of `tf.Tensor` where
-        - key: A `str` of the level of the multilevel features.
-        - values: A `tf.Tensor`, the feature map tensors, whose shape is
-            [batch, height_l, width_l, channels].
-
-    Returns:
-      scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
-        - key: A `str` of the level of the multilevel predictions.
-        - values: A `tf.Tensor` of the box scores predicted from a particular
-            feature level, whose shape is
-            [batch, height_l, width_l, num_classes * num_anchors_per_location].
-      boxes: A `dict` of `tf.Tensor` which includes coordinates of the
-        predictions.
-        - key: A `str` of the level of the multilevel predictions.
-        - values: A `tf.Tensor` of the box scores predicted from a particular
-            feature level, whose shape is
-            [batch, height_l, width_l,
-             num_params_per_anchor * num_anchors_per_location].
-      attributes: a dict of (attribute_name, attribute_prediction). Each
-        `attribute_prediction` is a dict of:
-        - key: `str`, the level of the multilevel predictions.
-        - values: `Tensor`, the box scores predicted from a particular feature
-            level, whose shape is
-            [batch, height_l, width_l,
-            attribute_size * num_anchors_per_location].
-        Can be an empty dictionary if no attribute learning is required.
-    """
-    scores = {}
-    boxes = {}
-    if self._config_dict['attribute_heads']:
-      attributes = {
-          att_config['name']: {}
-          for att_config in self._config_dict['attribute_heads']
-      }
-    else:
-      attributes = {}
-
-    for i, level in enumerate(
-        range(self._config_dict['min_level'],
-              self._config_dict['max_level'] + 1)):
-      this_level_features = features[str(level)]
-
-      # class net.
-      x = this_level_features
-      for conv, norm in zip(self._cls_convs, self._cls_norms[i]):
-        x = conv(x)
-        x = norm(x)
-        x = self._activation(x)
-      scores[str(level)] = self._classifier(x)
-
-      # box net.
-      x = this_level_features
-      for conv, norm in zip(self._box_convs, self._box_norms[i]):
-        x = conv(x)
-        x = norm(x)
-        x = self._activation(x)
-      boxes[str(level)] = self._box_regressor(x)
-
-      # attribute nets.
-      if self._config_dict['attribute_heads']:
-        for att_config in self._config_dict['attribute_heads']:
-          att_name = att_config['name']
-          x = this_level_features
-          for conv, norm in zip(self._att_convs[att_name],
-                                self._att_norms[att_name][i]):
-            x = conv(x)
-            x = norm(x)
-            x = self._activation(x)
-          attributes[att_name][str(level)] = self._att_predictors[att_name](x)
-
-    return scores, boxes, attributes
-
-  def get_config(self):
-    return self._config_dict
-
-  @classmethod
-  def from_config(cls, config):
-    return cls(**config)
-
-
-@tf.keras.utils.register_keras_serializable(package='Beta')
-class RPNHead(tf.keras.layers.Layer):
-  """Creates a Region Proposal Network (RPN) head."""
-
-  def __init__(
-      self,
-      min_level: int,
-      max_level: int,
-      num_anchors_per_location: int,
-      num_convs: int = 1,
-      num_filters: int = 256,
-      use_separable_conv: bool = False,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      **kwargs):
-    """Initializes a Region Proposal Network head.
-
-    Args:
-      min_level: An `int` number of minimum feature level.
-      max_level: An `int` number of maximum feature level.
-      num_anchors_per_location: An `int` number of number of anchors per pixel
-        location.
-      num_convs: An `int` number that represents the number of the intermediate
-        convolution layers before the prediction.
-      num_filters: An `int` number that represents the number of filters of the
-        intermediate convolution layers.
-      use_separable_conv: A `bool` that indicates whether the separable
-        convolution layers is used.
-      activation: A `str` that indicates which activation is used, e.g. 'relu',
-        'swish', etc.
-      use_sync_bn: A `bool` that indicates whether to use synchronized batch
-        normalization across different replicas.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default is None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    super(RPNHead, self).__init__(**kwargs)
-    self._config_dict = {
-        'min_level': min_level,
-        'max_level': max_level,
-        'num_anchors_per_location': num_anchors_per_location,
-        'num_convs': num_convs,
-        'num_filters': num_filters,
-        'use_separable_conv': use_separable_conv,
-        'activation': activation,
-        'use_sync_bn': use_sync_bn,
-        'norm_momentum': norm_momentum,
-        'norm_epsilon': norm_epsilon,
-        'kernel_regularizer': kernel_regularizer,
-        'bias_regularizer': bias_regularizer,
-    }
-
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._bn_axis = -1
-    else:
-      self._bn_axis = 1
-    self._activation = tf_utils.get_activation(activation)
-
-  def build(self, input_shape):
-    """Creates the variables of the head."""
-    conv_op = (tf.keras.layers.SeparableConv2D
-               if self._config_dict['use_separable_conv']
-               else tf.keras.layers.Conv2D)
-    conv_kwargs = {
-        'filters': self._config_dict['num_filters'],
-        'kernel_size': 3,
-        'padding': 'same',
-        'bias_initializer': tf.zeros_initializer(),
-        'bias_regularizer': self._config_dict['bias_regularizer'],
-    }
-    if not self._config_dict['use_separable_conv']:
-      conv_kwargs.update({
-          'kernel_initializer': tf.keras.initializers.RandomNormal(
-              stddev=0.01),
-          'kernel_regularizer': self._config_dict['kernel_regularizer'],
-      })
-    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
-             if self._config_dict['use_sync_bn']
-             else tf.keras.layers.BatchNormalization)
-    bn_kwargs = {
-        'axis': self._bn_axis,
-        'momentum': self._config_dict['norm_momentum'],
-        'epsilon': self._config_dict['norm_epsilon'],
-    }
-
-    self._convs = []
-    self._norms = []
-    for level in range(
-        self._config_dict['min_level'], self._config_dict['max_level'] + 1):
-      this_level_norms = []
-      for i in range(self._config_dict['num_convs']):
-        if level == self._config_dict['min_level']:
-          conv_name = 'rpn-conv_{}'.format(i)
-          self._convs.append(conv_op(name=conv_name, **conv_kwargs))
-        norm_name = 'rpn-conv-norm_{}_{}'.format(level, i)
-        this_level_norms.append(bn_op(name=norm_name, **bn_kwargs))
-      self._norms.append(this_level_norms)
-
-    classifier_kwargs = {
-        'filters': self._config_dict['num_anchors_per_location'],
-        'kernel_size': 1,
-        'padding': 'valid',
-        'bias_initializer': tf.zeros_initializer(),
-        'bias_regularizer': self._config_dict['bias_regularizer'],
-    }
-    if not self._config_dict['use_separable_conv']:
-      classifier_kwargs.update({
-          'kernel_initializer': tf.keras.initializers.RandomNormal(
-              stddev=1e-5),
-          'kernel_regularizer': self._config_dict['kernel_regularizer'],
-      })
-    self._classifier = conv_op(name='rpn-scores', **classifier_kwargs)
-
-    box_regressor_kwargs = {
-        'filters': 4 * self._config_dict['num_anchors_per_location'],
-        'kernel_size': 1,
-        'padding': 'valid',
-        'bias_initializer': tf.zeros_initializer(),
-        'bias_regularizer': self._config_dict['bias_regularizer'],
-    }
-    if not self._config_dict['use_separable_conv']:
-      box_regressor_kwargs.update({
-          'kernel_initializer': tf.keras.initializers.RandomNormal(
-              stddev=1e-5),
-          'kernel_regularizer': self._config_dict['kernel_regularizer'],
-      })
-    self._box_regressor = conv_op(name='rpn-boxes', **box_regressor_kwargs)
-
-    super(RPNHead, self).build(input_shape)
-
-  def call(self, features: Mapping[str, tf.Tensor]):
-    """Forward pass of the RPN head.
-
-    Args:
-      features: A `dict` of `tf.Tensor` where
-        - key: A `str` of the level of the multilevel features.
-        - values: A `tf.Tensor`, the feature map tensors, whose shape is [batch,
-          height_l, width_l, channels].
-
-    Returns:
-      scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
-        - key: A `str` of the level of the multilevel predictions.
-        - values: A `tf.Tensor` of the box scores predicted from a particular
-            feature level, whose shape is
-            [batch, height_l, width_l, num_classes * num_anchors_per_location].
-      boxes: A `dict` of `tf.Tensor` which includes coordinates of the
-        predictions.
-        - key: A `str` of the level of the multilevel predictions.
-        - values: A `tf.Tensor` of the box scores predicted from a particular
-            feature level, whose shape is
-            [batch, height_l, width_l, 4 * num_anchors_per_location].
-    """
-    scores = {}
-    boxes = {}
-    for i, level in enumerate(
-        range(self._config_dict['min_level'],
-              self._config_dict['max_level'] + 1)):
-      x = features[str(level)]
-      for conv, norm in zip(self._convs, self._norms[i]):
-        x = conv(x)
-        x = norm(x)
-        x = self._activation(x)
-      scores[str(level)] = self._classifier(x)
-      boxes[str(level)] = self._box_regressor(x)
-    return scores, boxes
-
-  def get_config(self):
-    return self._config_dict
-
-  @classmethod
-  def from_config(cls, config):
-    return cls(**config)
--- a/official/vision/beta/modeling/heads/dense_prediction_heads_test.py
+++ b/official/vision/beta/modeling/heads/dense_prediction_heads_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for dense_prediction_heads.py."""
-
-# Import libraries
-from absl.testing import parameterized
-import numpy as np
-import tensorflow as tf
-
-from official.vision.beta.modeling.heads import dense_prediction_heads
-
-
-class RetinaNetHeadTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      (False, False, False),
-      (False, True, False),
-      (True, False, True),
-      (True, True, True),
-  )
-  def test_forward(self, use_separable_conv, use_sync_bn, has_att_heads):
-    if has_att_heads:
-      attribute_heads = [dict(name='depth', type='regression', size=1)]
-    else:
-      attribute_heads = None
-
-    retinanet_head = dense_prediction_heads.RetinaNetHead(
-        min_level=3,
-        max_level=4,
-        num_classes=3,
-        num_anchors_per_location=3,
-        num_convs=2,
-        num_filters=256,
-        attribute_heads=attribute_heads,
-        use_separable_conv=use_separable_conv,
-        activation='relu',
-        use_sync_bn=use_sync_bn,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    features = {
-        '3': np.random.rand(2, 128, 128, 16),
-        '4': np.random.rand(2, 64, 64, 16),
-    }
-    scores, boxes, attributes = retinanet_head(features)
-    self.assertAllEqual(scores['3'].numpy().shape, [2, 128, 128, 9])
-    self.assertAllEqual(scores['4'].numpy().shape, [2, 64, 64, 9])
-    self.assertAllEqual(boxes['3'].numpy().shape, [2, 128, 128, 12])
-    self.assertAllEqual(boxes['4'].numpy().shape, [2, 64, 64, 12])
-    if has_att_heads:
-      for att in attributes.values():
-        self.assertAllEqual(att['3'].numpy().shape, [2, 128, 128, 3])
-        self.assertAllEqual(att['4'].numpy().shape, [2, 64, 64, 3])
-
-  def test_serialize_deserialize(self):
-    retinanet_head = dense_prediction_heads.RetinaNetHead(
-        min_level=3,
-        max_level=7,
-        num_classes=3,
-        num_anchors_per_location=9,
-        num_convs=2,
-        num_filters=16,
-        attribute_heads=None,
-        use_separable_conv=False,
-        activation='relu',
-        use_sync_bn=False,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    config = retinanet_head.get_config()
-    new_retinanet_head = (
-        dense_prediction_heads.RetinaNetHead.from_config(config))
-    self.assertAllEqual(
-        retinanet_head.get_config(), new_retinanet_head.get_config())
-
-
-class RpnHeadTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      (False, False),
-      (False, True),
-      (True, False),
-      (True, True),
-  )
-  def test_forward(self, use_separable_conv, use_sync_bn):
-    rpn_head = dense_prediction_heads.RPNHead(
-        min_level=3,
-        max_level=4,
-        num_anchors_per_location=3,
-        num_convs=2,
-        num_filters=256,
-        use_separable_conv=use_separable_conv,
-        activation='relu',
-        use_sync_bn=use_sync_bn,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    features = {
-        '3': np.random.rand(2, 128, 128, 16),
-        '4': np.random.rand(2, 64, 64, 16),
-    }
-    scores, boxes = rpn_head(features)
-    self.assertAllEqual(scores['3'].numpy().shape, [2, 128, 128, 3])
-    self.assertAllEqual(scores['4'].numpy().shape, [2, 64, 64, 3])
-    self.assertAllEqual(boxes['3'].numpy().shape, [2, 128, 128, 12])
-    self.assertAllEqual(boxes['4'].numpy().shape, [2, 64, 64, 12])
-
-  def test_serialize_deserialize(self):
-    rpn_head = dense_prediction_heads.RPNHead(
-        min_level=3,
-        max_level=7,
-        num_anchors_per_location=9,
-        num_convs=2,
-        num_filters=16,
-        use_separable_conv=False,
-        activation='relu',
-        use_sync_bn=False,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    config = rpn_head.get_config()
-    new_rpn_head = dense_prediction_heads.RPNHead.from_config(config)
-    self.assertAllEqual(rpn_head.get_config(), new_rpn_head.get_config())
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/beta/modeling/heads/instance_heads.py
+++ b/official/vision/beta/modeling/heads/instance_heads.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Contains definitions of instance prediction heads."""
-
-from typing import List, Union, Optional
-# Import libraries
-import tensorflow as tf
-
-from official.modeling import tf_utils
-
-
-@tf.keras.utils.register_keras_serializable(package='Beta')
-class DetectionHead(tf.keras.layers.Layer):
-  """Creates a detection head."""
-
-  def __init__(
-      self,
-      num_classes: int,
-      num_convs: int = 0,
-      num_filters: int = 256,
-      use_separable_conv: bool = False,
-      num_fcs: int = 2,
-      fc_dims: int = 1024,
-      class_agnostic_bbox_pred: bool = False,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      **kwargs):
-    """Initializes a detection head.
-
-    Args:
-      num_classes: An `int` for the number of classes.
-      num_convs: An `int` number that represents the number of the intermediate
-        convolution layers before the FC layers.
-      num_filters: An `int` number that represents the number of filters of the
-        intermediate convolution layers.
-      use_separable_conv: A `bool` that indicates whether the separable
-        convolution layers is used.
-      num_fcs: An `int` number that represents the number of FC layers before
-        the predictions.
-      fc_dims: An `int` number that represents the number of dimension of the FC
-        layers.
-      class_agnostic_bbox_pred: `bool`, indicating whether bboxes should be
-        predicted for every class or not.
-      activation: A `str` that indicates which activation is used, e.g. 'relu',
-        'swish', etc.
-      use_sync_bn: A `bool` that indicates whether to use synchronized batch
-        normalization across different replicas.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default is None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    super(DetectionHead, self).__init__(**kwargs)
-    self._config_dict = {
-        'num_classes': num_classes,
-        'num_convs': num_convs,
-        'num_filters': num_filters,
-        'use_separable_conv': use_separable_conv,
-        'num_fcs': num_fcs,
-        'fc_dims': fc_dims,
-        'class_agnostic_bbox_pred': class_agnostic_bbox_pred,
-        'activation': activation,
-        'use_sync_bn': use_sync_bn,
-        'norm_momentum': norm_momentum,
-        'norm_epsilon': norm_epsilon,
-        'kernel_regularizer': kernel_regularizer,
-        'bias_regularizer': bias_regularizer,
-    }
-
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._bn_axis = -1
-    else:
-      self._bn_axis = 1
-    self._activation = tf_utils.get_activation(activation)
-
-  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
-    """Creates the variables of the head."""
-    conv_op = (tf.keras.layers.SeparableConv2D
-               if self._config_dict['use_separable_conv']
-               else tf.keras.layers.Conv2D)
-    conv_kwargs = {
-        'filters': self._config_dict['num_filters'],
-        'kernel_size': 3,
-        'padding': 'same',
-    }
-    if self._config_dict['use_separable_conv']:
-      conv_kwargs.update({
-          'depthwise_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'pointwise_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'bias_initializer': tf.zeros_initializer(),
-          'depthwise_regularizer': self._config_dict['kernel_regularizer'],
-          'pointwise_regularizer': self._config_dict['kernel_regularizer'],
-          'bias_regularizer': self._config_dict['bias_regularizer'],
-      })
-    else:
-      conv_kwargs.update({
-          'kernel_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'bias_initializer': tf.zeros_initializer(),
-          'kernel_regularizer': self._config_dict['kernel_regularizer'],
-          'bias_regularizer': self._config_dict['bias_regularizer'],
-      })
-    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
-             if self._config_dict['use_sync_bn']
-             else tf.keras.layers.BatchNormalization)
-    bn_kwargs = {
-        'axis': self._bn_axis,
-        'momentum': self._config_dict['norm_momentum'],
-        'epsilon': self._config_dict['norm_epsilon'],
-    }
-
-    self._convs = []
-    self._conv_norms = []
-    for i in range(self._config_dict['num_convs']):
-      conv_name = 'detection-conv_{}'.format(i)
-      self._convs.append(conv_op(name=conv_name, **conv_kwargs))
-      bn_name = 'detection-conv-bn_{}'.format(i)
-      self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs))
-
-    self._fcs = []
-    self._fc_norms = []
-    for i in range(self._config_dict['num_fcs']):
-      fc_name = 'detection-fc_{}'.format(i)
-      self._fcs.append(
-          tf.keras.layers.Dense(
-              units=self._config_dict['fc_dims'],
-              kernel_initializer=tf.keras.initializers.VarianceScaling(
-                  scale=1 / 3.0, mode='fan_out', distribution='uniform'),
-              kernel_regularizer=self._config_dict['kernel_regularizer'],
-              bias_regularizer=self._config_dict['bias_regularizer'],
-              name=fc_name))
-      bn_name = 'detection-fc-bn_{}'.format(i)
-      self._fc_norms.append(bn_op(name=bn_name, **bn_kwargs))
-
-    self._classifier = tf.keras.layers.Dense(
-        units=self._config_dict['num_classes'],
-        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
-        bias_initializer=tf.zeros_initializer(),
-        kernel_regularizer=self._config_dict['kernel_regularizer'],
-        bias_regularizer=self._config_dict['bias_regularizer'],
-        name='detection-scores')
-
-    num_box_outputs = (4 if self._config_dict['class_agnostic_bbox_pred'] else
-                       self._config_dict['num_classes'] * 4)
-    self._box_regressor = tf.keras.layers.Dense(
-        units=num_box_outputs,
-        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001),
-        bias_initializer=tf.zeros_initializer(),
-        kernel_regularizer=self._config_dict['kernel_regularizer'],
-        bias_regularizer=self._config_dict['bias_regularizer'],
-        name='detection-boxes')
-
-    super(DetectionHead, self).build(input_shape)
-
-  def call(self, inputs: tf.Tensor, training: bool = None):
-    """Forward pass of box and class branches for the Mask-RCNN model.
-
-    Args:
-      inputs: A `tf.Tensor` of the shape [batch_size, num_instances, roi_height,
-        roi_width, roi_channels], representing the ROI features.
-      training: a `bool` indicating whether it is in `training` mode.
-
-    Returns:
-      class_outputs: A `tf.Tensor` of the shape
-        [batch_size, num_rois, num_classes], representing the class predictions.
-      box_outputs: A `tf.Tensor` of the shape
-        [batch_size, num_rois, num_classes * 4], representing the box
-        predictions.
-    """
-    roi_features = inputs
-    _, num_rois, height, width, filters = roi_features.get_shape().as_list()
-
-    x = tf.reshape(roi_features, [-1, height, width, filters])
-    for conv, bn in zip(self._convs, self._conv_norms):
-      x = conv(x)
-      x = bn(x)
-      x = self._activation(x)
-
-    _, _, _, filters = x.get_shape().as_list()
-    x = tf.reshape(x, [-1, num_rois, height * width * filters])
-
-    for fc, bn in zip(self._fcs, self._fc_norms):
-      x = fc(x)
-      x = bn(x)
-      x = self._activation(x)
-
-    classes = self._classifier(x)
-    boxes = self._box_regressor(x)
-    return classes, boxes
-
-  def get_config(self):
-    return self._config_dict
-
-  @classmethod
-  def from_config(cls, config):
-    return cls(**config)
-
-
-@tf.keras.utils.register_keras_serializable(package='Beta')
-class MaskHead(tf.keras.layers.Layer):
-  """Creates a mask head."""
-
-  def __init__(
-      self,
-      num_classes: int,
-      upsample_factor: int = 2,
-      num_convs: int = 4,
-      num_filters: int = 256,
-      use_separable_conv: bool = False,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      class_agnostic: bool = False,
-      **kwargs):
-    """Initializes a mask head.
-
-    Args:
-      num_classes: An `int` of the number of classes.
-      upsample_factor: An `int` that indicates the upsample factor to generate
-        the final predicted masks. It should be >= 1.
-      num_convs: An `int` number that represents the number of the intermediate
-        convolution layers before the mask prediction layers.
-      num_filters: An `int` number that represents the number of filters of the
-        intermediate convolution layers.
-      use_separable_conv: A `bool` that indicates whether the separable
-        convolution layers is used.
-      activation: A `str` that indicates which activation is used, e.g. 'relu',
-        'swish', etc.
-      use_sync_bn: A `bool` that indicates whether to use synchronized batch
-        normalization across different replicas.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default is None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-      class_agnostic: A `bool`. If set, we use a single channel mask head that
-        is shared between all classes.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    super(MaskHead, self).__init__(**kwargs)
-    self._config_dict = {
-        'num_classes': num_classes,
-        'upsample_factor': upsample_factor,
-        'num_convs': num_convs,
-        'num_filters': num_filters,
-        'use_separable_conv': use_separable_conv,
-        'activation': activation,
-        'use_sync_bn': use_sync_bn,
-        'norm_momentum': norm_momentum,
-        'norm_epsilon': norm_epsilon,
-        'kernel_regularizer': kernel_regularizer,
-        'bias_regularizer': bias_regularizer,
-        'class_agnostic': class_agnostic
-    }
-
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._bn_axis = -1
-    else:
-      self._bn_axis = 1
-    self._activation = tf_utils.get_activation(activation)
-
-  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
-    """Creates the variables of the head."""
-    conv_op = (tf.keras.layers.SeparableConv2D
-               if self._config_dict['use_separable_conv']
-               else tf.keras.layers.Conv2D)
-    conv_kwargs = {
-        'filters': self._config_dict['num_filters'],
-        'kernel_size': 3,
-        'padding': 'same',
-    }
-    if self._config_dict['use_separable_conv']:
-      conv_kwargs.update({
-          'depthwise_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'pointwise_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'bias_initializer': tf.zeros_initializer(),
-          'depthwise_regularizer': self._config_dict['kernel_regularizer'],
-          'pointwise_regularizer': self._config_dict['kernel_regularizer'],
-          'bias_regularizer': self._config_dict['bias_regularizer'],
-      })
-    else:
-      conv_kwargs.update({
-          'kernel_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'bias_initializer': tf.zeros_initializer(),
-          'kernel_regularizer': self._config_dict['kernel_regularizer'],
-          'bias_regularizer': self._config_dict['bias_regularizer'],
-      })
-    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
-             if self._config_dict['use_sync_bn']
-             else tf.keras.layers.BatchNormalization)
-    bn_kwargs = {
-        'axis': self._bn_axis,
-        'momentum': self._config_dict['norm_momentum'],
-        'epsilon': self._config_dict['norm_epsilon'],
-    }
-
-    self._convs = []
-    self._conv_norms = []
-    for i in range(self._config_dict['num_convs']):
-      conv_name = 'mask-conv_{}'.format(i)
-      self._convs.append(conv_op(name=conv_name, **conv_kwargs))
-      bn_name = 'mask-conv-bn_{}'.format(i)
-      self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs))
-
-    self._deconv = tf.keras.layers.Conv2DTranspose(
-        filters=self._config_dict['num_filters'],
-        kernel_size=self._config_dict['upsample_factor'],
-        strides=self._config_dict['upsample_factor'],
-        padding='valid',
-        kernel_initializer=tf.keras.initializers.VarianceScaling(
-            scale=2, mode='fan_out', distribution='untruncated_normal'),
-        bias_initializer=tf.zeros_initializer(),
-        kernel_regularizer=self._config_dict['kernel_regularizer'],
-        bias_regularizer=self._config_dict['bias_regularizer'],
-        name='mask-upsampling')
-    self._deconv_bn = bn_op(name='mask-deconv-bn', **bn_kwargs)
-
-    if self._config_dict['class_agnostic']:
-      num_filters = 1
-    else:
-      num_filters = self._config_dict['num_classes']
-
-    conv_kwargs = {
-        'filters': num_filters,
-        'kernel_size': 1,
-        'padding': 'valid',
-    }
-    if self._config_dict['use_separable_conv']:
-      conv_kwargs.update({
-          'depthwise_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'pointwise_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'bias_initializer': tf.zeros_initializer(),
-          'depthwise_regularizer': self._config_dict['kernel_regularizer'],
-          'pointwise_regularizer': self._config_dict['kernel_regularizer'],
-          'bias_regularizer': self._config_dict['bias_regularizer'],
-      })
-    else:
-      conv_kwargs.update({
-          'kernel_initializer': tf.keras.initializers.VarianceScaling(
-              scale=2, mode='fan_out', distribution='untruncated_normal'),
-          'bias_initializer': tf.zeros_initializer(),
-          'kernel_regularizer': self._config_dict['kernel_regularizer'],
-          'bias_regularizer': self._config_dict['bias_regularizer'],
-      })
-    self._mask_regressor = conv_op(name='mask-logits', **conv_kwargs)
-
-    super(MaskHead, self).build(input_shape)
-
-  def call(self, inputs: List[tf.Tensor], training: bool = None):
-    """Forward pass of mask branch for the Mask-RCNN model.
-
-    Args:
-      inputs: A `list` of two tensors where
-        inputs[0]: A `tf.Tensor` of shape [batch_size, num_instances,
-          roi_height, roi_width, roi_channels], representing the ROI features.
-        inputs[1]: A `tf.Tensor` of shape [batch_size, num_instances],
-          representing the classes of the ROIs.
-      training: A `bool` indicating whether it is in `training` mode.
-
-    Returns:
-      mask_outputs: A `tf.Tensor` of shape
-        [batch_size, num_instances, roi_height * upsample_factor,
-         roi_width * upsample_factor], representing the mask predictions.
-    """
-    roi_features, roi_classes = inputs
-    batch_size, num_rois, height, width, filters = (
-        roi_features.get_shape().as_list())
-    if batch_size is None:
-      batch_size = tf.shape(roi_features)[0]
-
-    x = tf.reshape(roi_features, [-1, height, width, filters])
-    for conv, bn in zip(self._convs, self._conv_norms):
-      x = conv(x)
-      x = bn(x)
-      x = self._activation(x)
-
-    x = self._deconv(x)
-    x = self._deconv_bn(x)
-    x = self._activation(x)
-
-    logits = self._mask_regressor(x)
-
-    mask_height = height * self._config_dict['upsample_factor']
-    mask_width = width * self._config_dict['upsample_factor']
-
-    if self._config_dict['class_agnostic']:
-      logits = tf.reshape(logits, [-1, num_rois, mask_height, mask_width, 1])
-    else:
-      logits = tf.reshape(
-          logits,
-          [-1, num_rois, mask_height, mask_width,
-           self._config_dict['num_classes']])
-
-    batch_indices = tf.tile(
-        tf.expand_dims(tf.range(batch_size), axis=1), [1, num_rois])
-    mask_indices = tf.tile(
-        tf.expand_dims(tf.range(num_rois), axis=0), [batch_size, 1])
-
-    if self._config_dict['class_agnostic']:
-      class_gather_indices = tf.zeros_like(roi_classes, dtype=tf.int32)
-    else:
-      class_gather_indices = tf.cast(roi_classes, dtype=tf.int32)
-
-    gather_indices = tf.stack(
-        [batch_indices, mask_indices, class_gather_indices],
-        axis=2)
-    mask_outputs = tf.gather_nd(
-        tf.transpose(logits, [0, 1, 4, 2, 3]), gather_indices)
-    return mask_outputs
-
-  def get_config(self):
-    return self._config_dict
-
-  @classmethod
-  def from_config(cls, config):
-    return cls(**config)
--- a/official/vision/beta/modeling/heads/instance_heads_test.py
+++ b/official/vision/beta/modeling/heads/instance_heads_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for instance_heads.py."""
-
-# Import libraries
-from absl.testing import parameterized
-import numpy as np
-import tensorflow as tf
-
-from official.vision.beta.modeling.heads import instance_heads
-
-
-class DetectionHeadTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      (0, 0, False, False),
-      (0, 1, False, False),
-      (1, 0, False, False),
-      (1, 1, False, False),
-  )
-  def test_forward(self, num_convs, num_fcs, use_separable_conv, use_sync_bn):
-    detection_head = instance_heads.DetectionHead(
-        num_classes=3,
-        num_convs=num_convs,
-        num_filters=16,
-        use_separable_conv=use_separable_conv,
-        num_fcs=num_fcs,
-        fc_dims=4,
-        activation='relu',
-        use_sync_bn=use_sync_bn,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    roi_features = np.random.rand(2, 10, 128, 128, 16)
-    scores, boxes = detection_head(roi_features)
-    self.assertAllEqual(scores.numpy().shape, [2, 10, 3])
-    self.assertAllEqual(boxes.numpy().shape, [2, 10, 12])
-
-  def test_serialize_deserialize(self):
-    detection_head = instance_heads.DetectionHead(
-        num_classes=91,
-        num_convs=0,
-        num_filters=256,
-        use_separable_conv=False,
-        num_fcs=2,
-        fc_dims=1024,
-        activation='relu',
-        use_sync_bn=False,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    config = detection_head.get_config()
-    new_detection_head = instance_heads.DetectionHead.from_config(config)
-    self.assertAllEqual(
-        detection_head.get_config(), new_detection_head.get_config())
-
-
-class MaskHeadTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      (1, 1, False),
-      (1, 2, False),
-      (2, 1, False),
-      (2, 2, False),
-  )
-  def test_forward(self, upsample_factor, num_convs, use_sync_bn):
-    mask_head = instance_heads.MaskHead(
-        num_classes=3,
-        upsample_factor=upsample_factor,
-        num_convs=num_convs,
-        num_filters=16,
-        use_separable_conv=False,
-        activation='relu',
-        use_sync_bn=use_sync_bn,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    roi_features = np.random.rand(2, 10, 14, 14, 16)
-    roi_classes = np.zeros((2, 10))
-    masks = mask_head([roi_features, roi_classes])
-    self.assertAllEqual(
-        masks.numpy().shape,
-        [2, 10, 14 * upsample_factor, 14 * upsample_factor])
-
-  def test_serialize_deserialize(self):
-    mask_head = instance_heads.MaskHead(
-        num_classes=3,
-        upsample_factor=2,
-        num_convs=1,
-        num_filters=256,
-        use_separable_conv=False,
-        activation='relu',
-        use_sync_bn=False,
-        norm_momentum=0.99,
-        norm_epsilon=0.001,
-        kernel_regularizer=None,
-        bias_regularizer=None,
-    )
-    config = mask_head.get_config()
-    new_mask_head = instance_heads.MaskHead.from_config(config)
-    self.assertAllEqual(
-        mask_head.get_config(), new_mask_head.get_config())
-
-  def test_forward_class_agnostic(self):
-    mask_head = instance_heads.MaskHead(
-        num_classes=3,
-        class_agnostic=True
-    )
-    roi_features = np.random.rand(2, 10, 14, 14, 16)
-    roi_classes = np.zeros((2, 10))
-    masks = mask_head([roi_features, roi_classes])
-    self.assertAllEqual(masks.numpy().shape, [2, 10, 28, 28])
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/beta/modeling/heads/segmentation_heads.py
+++ b/official/vision/beta/modeling/heads/segmentation_heads.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Contains definitions of segmentation heads."""
-from typing import List, Union, Optional, Mapping, Tuple, Any
-import tensorflow as tf
-
-from official.modeling import tf_utils
-from official.vision.beta.modeling.layers import nn_layers
-from official.vision.beta.ops import spatial_transform_ops
-
-
-class MaskScoring(tf.keras.Model):
-  """Creates a mask scoring layer.
-
-  This implements mask scoring layer from the paper:
-
-  Zhaojin Huang, Lichao Huang, Yongchao Gong, Chang Huang, Xinggang Wang.
-  Mask Scoring R-CNN.
-  (https://arxiv.org/pdf/1903.00241.pdf)
-  """
-
-  def __init__(
-      self,
-      num_classes: int,
-      fc_input_size: List[int],
-      num_convs: int = 3,
-      num_filters: int = 256,
-      fc_dims: int = 1024,
-      num_fcs: int = 2,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      **kwargs):
-
-    """Initializes mask scoring layer.
-
-    Args:
-      num_classes: An `int` for number of classes.
-      fc_input_size: A List of `int` for the input size of the
-        fully connected layers.
-      num_convs: An`int` for number of conv layers.
-      num_filters: An `int` for the number of filters for conv layers.
-      fc_dims: An `int` number of filters for each fully connected layers.
-      num_fcs: An `int` for number of fully connected layers.
-      activation: A `str` name of the activation function.
-      use_sync_bn: A bool, whether or not to use sync batch normalization.
-      norm_momentum: A float for the momentum in BatchNorm. Defaults to 0.99.
-      norm_epsilon: A float for the epsilon value in BatchNorm. Defaults to
-        0.001.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default is None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    super(MaskScoring, self).__init__(**kwargs)
-
-    self._config_dict = {
-        'num_classes': num_classes,
-        'num_convs': num_convs,
-        'num_filters': num_filters,
-        'fc_input_size': fc_input_size,
-        'fc_dims': fc_dims,
-        'num_fcs': num_fcs,
-        'use_sync_bn': use_sync_bn,
-        'norm_momentum': norm_momentum,
-        'norm_epsilon': norm_epsilon,
-        'activation': activation,
-        'kernel_regularizer': kernel_regularizer,
-        'bias_regularizer': bias_regularizer,
-    }
-
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._bn_axis = -1
-    else:
-      self._bn_axis = 1
-    self._activation = tf_utils.get_activation(activation)
-
-  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
-    """Creates the variables of the mask scoring head."""
-    conv_op = tf.keras.layers.Conv2D
-    conv_kwargs = {
-        'filters': self._config_dict['num_filters'],
-        'kernel_size': 3,
-        'padding': 'same',
-    }
-    conv_kwargs.update({
-        'kernel_initializer': tf.keras.initializers.VarianceScaling(
-            scale=2, mode='fan_out', distribution='untruncated_normal'),
-        'bias_initializer': tf.zeros_initializer(),
-        'kernel_regularizer': self._config_dict['kernel_regularizer'],
-        'bias_regularizer': self._config_dict['bias_regularizer'],
-    })
-    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
-             if self._config_dict['use_sync_bn']
-             else tf.keras.layers.BatchNormalization)
-    bn_kwargs = {
-        'axis': self._bn_axis,
-        'momentum': self._config_dict['norm_momentum'],
-        'epsilon': self._config_dict['norm_epsilon'],
-    }
-
-    self._convs = []
-    self._conv_norms = []
-    for i in range(self._config_dict['num_convs']):
-      conv_name = 'mask-scoring_{}'.format(i)
-      self._convs.append(conv_op(name=conv_name, **conv_kwargs))
-      bn_name = 'mask-scoring-bn_{}'.format(i)
-      self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs))
-
-    self._fcs = []
-    self._fc_norms = []
-    for i in range(self._config_dict['num_fcs']):
-      fc_name = 'mask-scoring-fc_{}'.format(i)
-      self._fcs.append(
-          tf.keras.layers.Dense(
-              units=self._config_dict['fc_dims'],
-              kernel_initializer=tf.keras.initializers.VarianceScaling(
-                  scale=1 / 3.0, mode='fan_out', distribution='uniform'),
-              kernel_regularizer=self._config_dict['kernel_regularizer'],
-              bias_regularizer=self._config_dict['bias_regularizer'],
-              name=fc_name))
-      bn_name = 'mask-scoring-fc-bn_{}'.format(i)
-      self._fc_norms.append(bn_op(name=bn_name, **bn_kwargs))
-
-    self._classifier = tf.keras.layers.Dense(
-        units=self._config_dict['num_classes'],
-        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
-        bias_initializer=tf.zeros_initializer(),
-        kernel_regularizer=self._config_dict['kernel_regularizer'],
-        bias_regularizer=self._config_dict['bias_regularizer'],
-        name='iou-scores')
-
-    super(MaskScoring, self).build(input_shape)
-
-  def call(self, inputs: tf.Tensor, training: bool = None):
-    """Forward pass mask scoring head.
-
-    Args:
-      inputs: A `tf.Tensor` of the shape [batch_size, width, size, num_classes],
-      representing the segmentation logits.
-      training: a `bool` indicating whether it is in `training` mode.
-
-    Returns:
-      mask_scores: A `tf.Tensor` of predicted mask scores
-        [batch_size, num_classes].
-    """
-    x = tf.stop_gradient(inputs)
-    for conv, bn in zip(self._convs, self._conv_norms):
-      x = conv(x)
-      x = bn(x)
-      x = self._activation(x)
-
-    # Casts feat to float32 so the resize op can be run on TPU.
-    x = tf.cast(x, tf.float32)
-    x = tf.image.resize(x, size=self._config_dict['fc_input_size'],
-                        method=tf.image.ResizeMethod.BILINEAR)
-    # Casts it back to be compatible with the rest opetations.
-    x = tf.cast(x, inputs.dtype)
-
-    _, h, w, filters = x.get_shape().as_list()
-    x = tf.reshape(x, [-1, h * w * filters])
-
-    for fc, bn in zip(self._fcs, self._fc_norms):
-      x = fc(x)
-      x = bn(x)
-      x = self._activation(x)
-
-    ious = self._classifier(x)
-    return ious
-
-  def get_config(self) -> Mapping[str, Any]:
-    return self._config_dict
-
-  @classmethod
-  def from_config(cls, config, custom_objects=None):
-    return cls(**config)
-
-
-@tf.keras.utils.register_keras_serializable(package='Beta')
-class SegmentationHead(tf.keras.layers.Layer):
-  """Creates a segmentation head."""
-
-  def __init__(
-      self,
-      num_classes: int,
-      level: Union[int, str],
-      num_convs: int = 2,
-      num_filters: int = 256,
-      use_depthwise_convolution: bool = False,
-      prediction_kernel_size: int = 1,
-      upsample_factor: int = 1,
-      feature_fusion: Optional[str] = None,
-      decoder_min_level: Optional[int] = None,
-      decoder_max_level: Optional[int] = None,
-      low_level: int = 2,
-      low_level_num_filters: int = 48,
-      num_decoder_filters: int = 256,
-      activation: str = 'relu',
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      **kwargs):
-    """Initializes a segmentation head.
-
-    Args:
-      num_classes: An `int` number of mask classification categories. The number
-        of classes does not include background class.
-      level: An `int` or `str`, level to use to build segmentation head.
-      num_convs: An `int` number of stacked convolution before the last
-        prediction layer.
-      num_filters: An `int` number to specify the number of filters used.
-        Default is 256.
-      use_depthwise_convolution: A bool to specify if use depthwise separable
-        convolutions.
-      prediction_kernel_size: An `int` number to specify the kernel size of the
-      prediction layer.
-      upsample_factor: An `int` number to specify the upsampling factor to
-        generate finer mask. Default 1 means no upsampling is applied.
-      feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`,
-        `panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from
-        decoder_features[level] will be fused with low level feature maps from
-        backbone. If `pyramid_fusion`, multiscale features will be resized and
-        fused at the target level.
-      decoder_min_level: An `int` of minimum level from decoder to use in
-        feature fusion. It is only used when feature_fusion is set to
-        `panoptic_fpn_fusion`.
-      decoder_max_level: An `int` of maximum level from decoder to use in
-        feature fusion. It is only used when feature_fusion is set to
-        `panoptic_fpn_fusion`.
-      low_level: An `int` of backbone level to be used for feature fusion. It is
-        used when feature_fusion is set to `deeplabv3plus`.
-      low_level_num_filters: An `int` of reduced number of filters for the low
-        level features before fusing it with higher level features. It is only
-        used when feature_fusion is set to `deeplabv3plus`.
-      num_decoder_filters: An `int` of number of filters in the decoder outputs.
-        It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
-      activation: A `str` that indicates which activation is used, e.g. 'relu',
-        'swish', etc.
-      use_sync_bn: A `bool` that indicates whether to use synchronized batch
-        normalization across different replicas.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
-      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
-        Conv2D. Default is None.
-      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
-      **kwargs: Additional keyword arguments to be passed.
-    """
-    super(SegmentationHead, self).__init__(**kwargs)
-
-    self._config_dict = {
-        'num_classes': num_classes,
-        'level': level,
-        'num_convs': num_convs,
-        'num_filters': num_filters,
-        'use_depthwise_convolution': use_depthwise_convolution,
-        'prediction_kernel_size': prediction_kernel_size,
-        'upsample_factor': upsample_factor,
-        'feature_fusion': feature_fusion,
-        'decoder_min_level': decoder_min_level,
-        'decoder_max_level': decoder_max_level,
-        'low_level': low_level,
-        'low_level_num_filters': low_level_num_filters,
-        'num_decoder_filters': num_decoder_filters,
-        'activation': activation,
-        'use_sync_bn': use_sync_bn,
-        'norm_momentum': norm_momentum,
-        'norm_epsilon': norm_epsilon,
-        'kernel_regularizer': kernel_regularizer,
-        'bias_regularizer': bias_regularizer
-    }
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      self._bn_axis = -1
-    else:
-      self._bn_axis = 1
-    self._activation = tf_utils.get_activation(activation)
-
-  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
-    """Creates the variables of the segmentation head."""
-    use_depthwise_convolution = self._config_dict['use_depthwise_convolution']
-    random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01)
-    conv_op = tf.keras.layers.Conv2D
-    conv_kwargs = {
-        'kernel_size': 3 if not use_depthwise_convolution else 1,
-        'padding': 'same',
-        'use_bias': False,
-        'kernel_initializer': random_initializer,
-        'kernel_regularizer': self._config_dict['kernel_regularizer'],
-    }
-    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
-             if self._config_dict['use_sync_bn']
-             else tf.keras.layers.BatchNormalization)
-    bn_kwargs = {
-        'axis': self._bn_axis,
-        'momentum': self._config_dict['norm_momentum'],
-        'epsilon': self._config_dict['norm_epsilon'],
-    }
-
-    if self._config_dict['feature_fusion'] == 'deeplabv3plus':
-      # Deeplabv3+ feature fusion layers.
-      self._dlv3p_conv = conv_op(
-          kernel_size=1,
-          padding='same',
-          use_bias=False,
-          kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
-          kernel_regularizer=self._config_dict['kernel_regularizer'],
-          name='segmentation_head_deeplabv3p_fusion_conv',
-          filters=self._config_dict['low_level_num_filters'])
-
-      self._dlv3p_norm = bn_op(
-          name='segmentation_head_deeplabv3p_fusion_norm', **bn_kwargs)
-
-    elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion':
-      self._panoptic_fpn_fusion = nn_layers.PanopticFPNFusion(
-          min_level=self._config_dict['decoder_min_level'],
-          max_level=self._config_dict['decoder_max_level'],
-          target_level=self._config_dict['level'],
-          num_filters=self._config_dict['num_filters'],
-          num_fpn_filters=self._config_dict['num_decoder_filters'],
-          activation=self._config_dict['activation'],
-          kernel_regularizer=self._config_dict['kernel_regularizer'],
-          bias_regularizer=self._config_dict['bias_regularizer'])
-
-    # Segmentation head layers.
-    self._convs = []
-    self._norms = []
-    for i in range(self._config_dict['num_convs']):
-      if use_depthwise_convolution:
-        self._convs.append(
-            tf.keras.layers.DepthwiseConv2D(
-                name='segmentation_head_depthwise_conv_{}'.format(i),
-                kernel_size=3,
-                padding='same',
-                use_bias=False,
-                depthwise_initializer=random_initializer,
-                depthwise_regularizer=self._config_dict['kernel_regularizer'],
-                depth_multiplier=1))
-        norm_name = 'segmentation_head_depthwise_norm_{}'.format(i)
-        self._norms.append(bn_op(name=norm_name, **bn_kwargs))
-      conv_name = 'segmentation_head_conv_{}'.format(i)
-      self._convs.append(
-          conv_op(
-              name=conv_name,
-              filters=self._config_dict['num_filters'],
-              **conv_kwargs))
-      norm_name = 'segmentation_head_norm_{}'.format(i)
-      self._norms.append(bn_op(name=norm_name, **bn_kwargs))
-
-    self._classifier = conv_op(
-        name='segmentation_output',
-        filters=self._config_dict['num_classes'],
-        kernel_size=self._config_dict['prediction_kernel_size'],
-        padding='same',
-        bias_initializer=tf.zeros_initializer(),
-        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
-        kernel_regularizer=self._config_dict['kernel_regularizer'],
-        bias_regularizer=self._config_dict['bias_regularizer'])
-
-    super().build(input_shape)
-
-  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
-                               Union[tf.Tensor, Mapping[str, tf.Tensor]]]):
-    """Forward pass of the segmentation head.
-
-    It supports both a tuple of 2 tensors or 2 dictionaries. The first is
-    backbone endpoints, and the second is decoder endpoints. When inputs are
-    tensors, they are from a single level of feature maps. When inputs are
-    dictionaries, they contain multiple levels of feature maps, where the key
-    is the index of feature map.
-
-    Args:
-      inputs: A tuple of 2 feature map tensors of shape
-        [batch, height_l, width_l, channels] or 2 dictionaries of tensors:
-        - key: A `str` of the level of the multilevel features.
-        - values: A `tf.Tensor` of the feature map tensors, whose shape is
-            [batch, height_l, width_l, channels].
-        The first is backbone endpoints, and the second is decoder endpoints.
-    Returns:
-      segmentation prediction mask: A `tf.Tensor` of the segmentation mask
-        scores predicted from input features.
-    """
-
-    backbone_output = inputs[0]
-    decoder_output = inputs[1]
-    if self._config_dict['feature_fusion'] == 'deeplabv3plus':
-      # deeplabv3+ feature fusion
-      x = decoder_output[str(self._config_dict['level'])] if isinstance(
-          decoder_output, dict) else decoder_output
-      y = backbone_output[str(self._config_dict['low_level'])] if isinstance(
-          backbone_output, dict) else backbone_output
-      y = self._dlv3p_norm(self._dlv3p_conv(y))
-      y = self._activation(y)
-
-      x = tf.image.resize(
-          x, tf.shape(y)[1:3], method=tf.image.ResizeMethod.BILINEAR)
-      x = tf.cast(x, dtype=y.dtype)
-      x = tf.concat([x, y], axis=self._bn_axis)
-    elif self._config_dict['feature_fusion'] == 'pyramid_fusion':
-      if not isinstance(decoder_output, dict):
-        raise ValueError('Only support dictionary decoder_output.')
-      x = nn_layers.pyramid_feature_fusion(decoder_output,
-                                           self._config_dict['level'])
-    elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion':
-      x = self._panoptic_fpn_fusion(decoder_output)
-    else:
-      x = decoder_output[str(self._config_dict['level'])] if isinstance(
-          decoder_output, dict) else decoder_output
-
-    for conv, norm in zip(self._convs, self._norms):
-      x = conv(x)
-      x = norm(x)
-      x = self._activation(x)
-    if self._config_dict['upsample_factor'] > 1:
-      x = spatial_transform_ops.nearest_upsampling(
-          x, scale=self._config_dict['upsample_factor'])
-
-    return self._classifier(x)
-
-  def get_config(self):
-    base_config = super().get_config()
-    return dict(list(base_config.items()) + list(self._config_dict.items()))
-
-  @classmethod
-  def from_config(cls, config):
-    return cls(**config)
--- a/official/vision/beta/modeling/heads/segmentation_heads_test.py
+++ b/official/vision/beta/modeling/heads/segmentation_heads_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for segmentation_heads.py."""
-
-# Import libraries
-from absl.testing import parameterized
-import numpy as np
-import tensorflow as tf
-
-from official.vision.beta.modeling.heads import segmentation_heads
-
-
-class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      (2, 'pyramid_fusion', None, None),
-      (3, 'pyramid_fusion', None, None),
-      (2, 'panoptic_fpn_fusion', 2, 5),
-      (2, 'panoptic_fpn_fusion', 2, 6),
-      (3, 'panoptic_fpn_fusion', 3, 5),
-      (3, 'panoptic_fpn_fusion', 3, 6))
-  def test_forward(self, level, feature_fusion,
-                   decoder_min_level, decoder_max_level):
-    backbone_features = {
-        '3': np.random.rand(2, 128, 128, 16),
-        '4': np.random.rand(2, 64, 64, 16),
-        '5': np.random.rand(2, 32, 32, 16),
-    }
-    decoder_features = {
-        '3': np.random.rand(2, 128, 128, 64),
-        '4': np.random.rand(2, 64, 64, 64),
-        '5': np.random.rand(2, 32, 32, 64),
-        '6': np.random.rand(2, 16, 16, 64),
-    }
-
-    if feature_fusion == 'panoptic_fpn_fusion':
-      backbone_features['2'] = np.random.rand(2, 256, 256, 16)
-      decoder_features['2'] = np.random.rand(2, 256, 256, 64)
-
-    head = segmentation_heads.SegmentationHead(
-        num_classes=10,
-        level=level,
-        feature_fusion=feature_fusion,
-        decoder_min_level=decoder_min_level,
-        decoder_max_level=decoder_max_level,
-        num_decoder_filters=64)
-
-    logits = head((backbone_features, decoder_features))
-
-    if level in decoder_features:
-      self.assertAllEqual(logits.numpy().shape, [
-          2, decoder_features[str(level)].shape[1],
-          decoder_features[str(level)].shape[2], 10
-      ])
-
-  def test_serialize_deserialize(self):
-    head = segmentation_heads.SegmentationHead(num_classes=10, level=3)
-    config = head.get_config()
-    new_head = segmentation_heads.SegmentationHead.from_config(config)
-    self.assertAllEqual(head.get_config(), new_head.get_config())
-
-
-class MaskScoringHeadTest(parameterized.TestCase, tf.test.TestCase):
-
-  @parameterized.parameters(
-      (1, 1, 64, [4, 4]),
-      (2, 1, 64, [4, 4]),
-      (3, 1, 64, [4, 4]),
-      (1, 2, 32, [8, 8]),
-      (2, 2, 32, [8, 8]),
-      (3, 2, 32, [8, 8]),)
-  def test_forward(self, num_convs, num_fcs,
-                   num_filters, fc_input_size):
-    features = np.random.rand(2, 64, 64, 16)
-
-    head = segmentation_heads.MaskScoring(
-        num_classes=2,
-        num_convs=num_convs,
-        num_filters=num_filters,
-        fc_dims=128,
-        fc_input_size=fc_input_size)
-
-    scores = head(features)
-    self.assertAllEqual(scores.numpy().shape, [2, 2])
-
-  def test_serialize_deserialize(self):
-    head = segmentation_heads.MaskScoring(
-        num_classes=2, fc_input_size=[4, 4], fc_dims=128)
-    config = head.get_config()
-    new_head = segmentation_heads.MaskScoring.from_config(config)
-    self.assertAllEqual(head.get_config(), new_head.get_config())
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/beta/modeling/layers/__init__.py
+++ b/official/vision/beta/modeling/layers/__init__.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Layers package definition."""
-
-from official.vision.beta.modeling.layers.box_sampler import BoxSampler
-from official.vision.beta.modeling.layers.detection_generator import DetectionGenerator
-from official.vision.beta.modeling.layers.detection_generator import MultilevelDetectionGenerator
-from official.vision.beta.modeling.layers.mask_sampler import MaskSampler
-from official.vision.beta.modeling.layers.nn_blocks import BottleneckBlock
-from official.vision.beta.modeling.layers.nn_blocks import BottleneckResidualInner
-from official.vision.beta.modeling.layers.nn_blocks import DepthwiseSeparableConvBlock
-from official.vision.beta.modeling.layers.nn_blocks import InvertedBottleneckBlock
-from official.vision.beta.modeling.layers.nn_blocks import ResidualBlock
-from official.vision.beta.modeling.layers.nn_blocks import ResidualInner
-from official.vision.beta.modeling.layers.nn_blocks import ReversibleLayer
-from official.vision.beta.modeling.layers.nn_blocks_3d import BottleneckBlock3D
-from official.vision.beta.modeling.layers.nn_blocks_3d import SelfGating
-from official.vision.beta.modeling.layers.nn_layers import CausalConvMixin
-from official.vision.beta.modeling.layers.nn_layers import Conv2D
-from official.vision.beta.modeling.layers.nn_layers import Conv3D
-from official.vision.beta.modeling.layers.nn_layers import DepthwiseConv2D
-from official.vision.beta.modeling.layers.nn_layers import GlobalAveragePool3D
-from official.vision.beta.modeling.layers.nn_layers import PositionalEncoding
-from official.vision.beta.modeling.layers.nn_layers import Scale
-from official.vision.beta.modeling.layers.nn_layers import SpatialAveragePool3D
-from official.vision.beta.modeling.layers.nn_layers import SqueezeExcitation
-from official.vision.beta.modeling.layers.nn_layers import StochasticDepth
-from official.vision.beta.modeling.layers.nn_layers import TemporalSoftmaxPool
-from official.vision.beta.modeling.layers.roi_aligner import MultilevelROIAligner
-from official.vision.beta.modeling.layers.roi_generator import MultilevelROIGenerator
-from official.vision.beta.modeling.layers.roi_sampler import ROISampler
--- a/official/vision/beta/modeling/layers/box_sampler.py
+++ b/official/vision/beta/modeling/layers/box_sampler.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Contains definitions of box sampler."""
-
-# Import libraries
-import tensorflow as tf
-
-from official.vision.beta.ops import sampling_ops
-
-
-@tf.keras.utils.register_keras_serializable(package='Beta')
-class BoxSampler(tf.keras.layers.Layer):
-  """Creates a BoxSampler to sample positive and negative boxes."""
-
-  def __init__(self,
-               num_samples: int = 512,
-               foreground_fraction: float = 0.25,
-               **kwargs):
-    """Initializes a box sampler.
-
-    Args:
-      num_samples: An `int` of the number of sampled boxes per image.
-      foreground_fraction: A `float` in [0, 1], what percentage of boxes should
-        be sampled from the positive examples.
-      **kwargs: Additional keyword arguments passed to Layer.
-    """
-    self._config_dict = {
-        'num_samples': num_samples,
-        'foreground_fraction': foreground_fraction,
-    }
-    super(BoxSampler, self).__init__(**kwargs)
-
-  def call(self, positive_matches: tf.Tensor, negative_matches: tf.Tensor,
-           ignored_matches: tf.Tensor):
-    """Samples and selects positive and negative instances.
-
-    Args:
-      positive_matches: A `bool` tensor of shape of [batch, N] where N is the
-        number of instances. For each element, `True` means the instance
-        corresponds to a positive example.
-      negative_matches: A `bool` tensor of shape of [batch, N] where N is the
-        number of instances. For each element, `True` means the instance
-        corresponds to a negative example.
-      ignored_matches: A `bool` tensor of shape of [batch, N] where N is the
-        number of instances. For each element, `True` means the instance should
-        be ignored.
-
-    Returns:
-      A `tf.tensor` of shape of [batch_size, K], storing the indices of the
-        sampled examples, where K is `num_samples`.
-    """
-    sample_candidates = tf.logical_and(
-        tf.logical_or(positive_matches, negative_matches),
-        tf.logical_not(ignored_matches))
-
-    sampler = sampling_ops.BalancedPositiveNegativeSampler(
-        positive_fraction=self._config_dict['foreground_fraction'],
-        is_static=True)
-
-    batch_size = sample_candidates.shape[0]
-    sampled_indicators = []
-    for i in range(batch_size):
-      sampled_indicator = sampler.subsample(
-          sample_candidates[i],
-          self._config_dict['num_samples'],
-          positive_matches[i])
-      sampled_indicators.append(sampled_indicator)
-    sampled_indicators = tf.stack(sampled_indicators)
-    _, selected_indices = tf.nn.top_k(
-        tf.cast(sampled_indicators, dtype=tf.int32),
-        k=self._config_dict['num_samples'],
-        sorted=True)
-
-    return selected_indices
-
-  def get_config(self):
-    return self._config_dict
-
-  @classmethod
-  def from_config(cls, config):
-    return cls(**config)
--- a/official/vision/beta/modeling/layers/deeplab.py
+++ b/official/vision/beta/modeling/layers/deeplab.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Layers for DeepLabV3."""
-
-import tensorflow as tf
-
-
-class SpatialPyramidPooling(tf.keras.layers.Layer):
-  """Implements the Atrous Spatial Pyramid Pooling.
-
-  References:
-    [Rethinking Atrous Convolution for Semantic Image Segmentation](
-      https://arxiv.org/pdf/1706.05587.pdf)
-    [Encoder-Decoder with Atrous Separable Convolution for Semantic Image
-    Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
-  """
-
-  def __init__(
-      self,
-      output_channels,
-      dilation_rates,
-      pool_kernel_size=None,
-      use_sync_bn=False,
-      batchnorm_momentum=0.99,
-      batchnorm_epsilon=0.001,
-      activation='relu',
-      dropout=0.5,
-      kernel_initializer='glorot_uniform',
-      kernel_regularizer=None,
-      interpolation='bilinear',
-      use_depthwise_convolution=False,
-      **kwargs):
-    """Initializes `SpatialPyramidPooling`.
-
-    Args:
-      output_channels: Number of channels produced by SpatialPyramidPooling.
-      dilation_rates: A list of integers for parallel dilated conv.
-      pool_kernel_size: A list of integers or None. If None, global average
-        pooling is applied, otherwise an average pooling of pool_kernel_size
-        is applied.
-      use_sync_bn: A bool, whether or not to use sync batch normalization.
-      batchnorm_momentum: A float for the momentum in BatchNorm. Defaults to
-        0.99.
-      batchnorm_epsilon: A float for the epsilon value in BatchNorm. Defaults to
-        0.001.
-      activation: A `str` for type of activation to be used. Defaults to 'relu'.
-      dropout: A float for the dropout rate before output. Defaults to 0.5.
-      kernel_initializer: Kernel initializer for conv layers. Defaults to
-        `glorot_uniform`.
-      kernel_regularizer: Kernel regularizer for conv layers. Defaults to None.
-      interpolation: The interpolation method for upsampling. Defaults to
-        `bilinear`.
-      use_depthwise_convolution: Allows spatial pooling to be separable
-         depthwise convolusions. [Encoder-Decoder with Atrous Separable
-         Convolution for Semantic Image Segmentation](
-         https://arxiv.org/pdf/1802.02611.pdf)
-      **kwargs: Other keyword arguments for the layer.
-    """
-    super(SpatialPyramidPooling, self).__init__(**kwargs)
-
-    self.output_channels = output_channels
-    self.dilation_rates = dilation_rates
-    self.use_sync_bn = use_sync_bn
-    self.batchnorm_momentum = batchnorm_momentum
-    self.batchnorm_epsilon = batchnorm_epsilon
-    self.activation = activation
-    self.dropout = dropout
-    self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)
-    self.kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
-    self.interpolation = interpolation
-    self.input_spec = tf.keras.layers.InputSpec(ndim=4)
-    self.pool_kernel_size = pool_kernel_size
-    self.use_depthwise_convolution = use_depthwise_convolution
-
-  def build(self, input_shape):
-    height = input_shape[1]
-    width = input_shape[2]
-    channels = input_shape[3]
-
-    self.aspp_layers = []
-
-    if self.use_sync_bn:
-      bn_op = tf.keras.layers.experimental.SyncBatchNormalization
-    else:
-      bn_op = tf.keras.layers.BatchNormalization
-
-    if tf.keras.backend.image_data_format() == 'channels_last':
-      bn_axis = -1
-    else:
-      bn_axis = 1
-
-    conv_sequential = tf.keras.Sequential([
-        tf.keras.layers.Conv2D(
-            filters=self.output_channels, kernel_size=(1, 1),
-            kernel_initializer=self.kernel_initializer,
-            kernel_regularizer=self.kernel_regularizer,
-            use_bias=False),
-        bn_op(
-            axis=bn_axis,
-            momentum=self.batchnorm_momentum,
-            epsilon=self.batchnorm_epsilon),
-        tf.keras.layers.Activation(self.activation)
-    ])
-    self.aspp_layers.append(conv_sequential)
-
-    for dilation_rate in self.dilation_rates:
-      leading_layers = []
-      kernel_size = (3, 3)
-      if self.use_depthwise_convolution:
-        leading_layers += [
-            tf.keras.layers.DepthwiseConv2D(
-                depth_multiplier=1, kernel_size=kernel_size,
-                padding='same', depthwise_regularizer=self.kernel_regularizer,
-                depthwise_initializer=self.kernel_initializer,
-                dilation_rate=dilation_rate, use_bias=False)
-        ]
-        kernel_size = (1, 1)
-      conv_sequential = tf.keras.Sequential(leading_layers + [
-          tf.keras.layers.Conv2D(
-              filters=self.output_channels, kernel_size=kernel_size,
-              padding='same', kernel_regularizer=self.kernel_regularizer,
-              kernel_initializer=self.kernel_initializer,
-              dilation_rate=dilation_rate, use_bias=False),
-          bn_op(axis=bn_axis, momentum=self.batchnorm_momentum,
-                epsilon=self.batchnorm_epsilon),
-          tf.keras.layers.Activation(self.activation)])
-      self.aspp_layers.append(conv_sequential)
-
-    if self.pool_kernel_size is None:
-      pool_sequential = tf.keras.Sequential([
-          tf.keras.layers.GlobalAveragePooling2D(),
-          tf.keras.layers.Reshape((1, 1, channels))])
-    else:
-      pool_sequential = tf.keras.Sequential([
-          tf.keras.layers.AveragePooling2D(self.pool_kernel_size)])
-
-    pool_sequential.add(
-        tf.keras.Sequential([
-            tf.keras.layers.Conv2D(
-                filters=self.output_channels,
-                kernel_size=(1, 1),
-                kernel_initializer=self.kernel_initializer,
-                kernel_regularizer=self.kernel_regularizer,
-                use_bias=False),
-            bn_op(
-                axis=bn_axis,
-                momentum=self.batchnorm_momentum,
-                epsilon=self.batchnorm_epsilon),
-            tf.keras.layers.Activation(self.activation),
-            tf.keras.layers.experimental.preprocessing.Resizing(
-                height,
-                width,
-                interpolation=self.interpolation,
-                dtype=tf.float32)
-        ]))
-
-    self.aspp_layers.append(pool_sequential)
-
-    self.projection = tf.keras.Sequential([
-        tf.keras.layers.Conv2D(
-            filters=self.output_channels, kernel_size=(1, 1),
-            kernel_initializer=self.kernel_initializer,
-            kernel_regularizer=self.kernel_regularizer,
-            use_bias=False),
-        bn_op(
-            axis=bn_axis,
-            momentum=self.batchnorm_momentum,
-            epsilon=self.batchnorm_epsilon),
-        tf.keras.layers.Activation(self.activation),
-        tf.keras.layers.Dropout(rate=self.dropout)])
-
-  def call(self, inputs, training=None):
-    if training is None:
-      training = tf.keras.backend.learning_phase()
-    result = []
-    for layer in self.aspp_layers:
-      result.append(tf.cast(layer(inputs, training=training), inputs.dtype))
-    result = tf.concat(result, axis=-1)
-    result = self.projection(result, training=training)
-    return result
-
-  def get_config(self):
-    config = {
-        'output_channels': self.output_channels,
-        'dilation_rates': self.dilation_rates,
-        'pool_kernel_size': self.pool_kernel_size,
-        'use_sync_bn': self.use_sync_bn,
-        'batchnorm_momentum': self.batchnorm_momentum,
-        'batchnorm_epsilon': self.batchnorm_epsilon,
-        'activation': self.activation,
-        'dropout': self.dropout,
-        'kernel_initializer': tf.keras.initializers.serialize(
-            self.kernel_initializer),
-        'kernel_regularizer': tf.keras.regularizers.serialize(
-            self.kernel_regularizer),
-        'interpolation': self.interpolation,
-    }
-    base_config = super(SpatialPyramidPooling, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
--- a/official/vision/beta/modeling/layers/deeplab_test.py
+++ b/official/vision/beta/modeling/layers/deeplab_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for ASPP."""
-
-import tensorflow as tf
-
-from tensorflow.python.keras import keras_parameterized
-from official.vision.beta.modeling.layers import deeplab
-
-
-@keras_parameterized.run_all_keras_modes
-class DeeplabTest(keras_parameterized.TestCase):
-
-  @keras_parameterized.parameterized.parameters(
-      (None,),
-      ([32, 32],),
-      )
-  def test_aspp(self, pool_kernel_size):
-    inputs = tf.keras.Input(shape=(64, 64, 128), dtype=tf.float32)
-    layer = deeplab.SpatialPyramidPooling(output_channels=256,
-                                          dilation_rates=[6, 12, 18],
-                                          pool_kernel_size=None)
-    output = layer(inputs)
-    self.assertAllEqual([None, 64, 64, 256], output.shape)
-
-  def test_aspp_invalid_shape(self):
-    inputs = tf.keras.Input(shape=(64, 64), dtype=tf.float32)
-    layer = deeplab.SpatialPyramidPooling(output_channels=256,
-                                          dilation_rates=[6, 12, 18])
-    with self.assertRaises(ValueError):
-      _ = layer(inputs)
-
-  def test_config_with_custom_name(self):
-    layer = deeplab.SpatialPyramidPooling(256, [5], name='aspp')
-    config = layer.get_config()
-    layer_1 = deeplab.SpatialPyramidPooling.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-if __name__ == '__main__':
-  tf.test.main()