Internal change

PiperOrigin-RevId: 406888835

Internal change
PiperOrigin-RevId: 406888835
460890ed · A. Unique TensorFlower · f2bc366e · 460890ed · 460890ed · 460890ed
Commit 460890ed authored Nov 01, 2021 by A. Unique TensorFlower
20 changed files
--- a/official/vision/beta/modeling/backbones/mobilenet.py
+++ b/official/vision/beta/modeling/backbones/mobilenet.py
@@ -41,6 +41,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
      kernel_size: int = 3,
      strides: int = 1,
      use_bias: bool = False,
+      use_explicit_padding: bool = False,
      activation: str = 'relu6',
      kernel_initializer: str = 'VarianceScaling',
      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
@@ -60,6 +61,9 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
      strides: An `int` of block stride. If greater than 1, this block will
        ultimately downsample the input.
      use_bias: If True, use bias in the convolution layer.
+      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+        inputs so that the output dimensions are the same as if 'SAME' padding
+        were used.
      activation: A `str` name of the activation function.
      kernel_initializer: A `str` for kernel initializer of convolutional
        layers.
@@ -79,6 +83,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
    self._strides = strides
    self._activation = activation
    self._use_bias = use_bias
+    self._use_explicit_padding = use_explicit_padding
    self._kernel_initializer = kernel_initializer
    self._kernel_regularizer = kernel_regularizer
    self._bias_regularizer = bias_regularizer
@@ -87,6 +92,10 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
    self._norm_momentum = norm_momentum
    self._norm_epsilon = norm_epsilon

+    if use_explicit_padding and kernel_size > 1:
+      self._padding = 'valid'
+    else:
+      self._padding = 'same'
    if use_sync_bn:
      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
    else:
@@ -102,6 +111,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
        'strides': self._strides,
        'kernel_size': self._kernel_size,
        'use_bias': self._use_bias,
+        'use_explicit_padding': self._use_explicit_padding,
        'kernel_initializer': self._kernel_initializer,
        'kernel_regularizer': self._kernel_regularizer,
        'bias_regularizer': self._bias_regularizer,
@@ -115,11 +125,14 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
    return dict(list(base_config.items()) + list(config.items()))

  def build(self, input_shape):
+    if self._use_explicit_padding and self._kernel_size > 1:
+      padding_size = nn_layers.get_padding_for_kernel_size(self._kernel_size)
+      self._pad = tf.keras.layers.ZeroPadding2D(padding_size)
    self._conv0 = tf.keras.layers.Conv2D(
        filters=self._filters,
        kernel_size=self._kernel_size,
        strides=self._strides,
-        padding='same',
+        padding=self._padding,
        use_bias=self._use_bias,
        kernel_initializer=self._kernel_initializer,
        kernel_regularizer=self._kernel_regularizer,
@@ -135,6 +148,8 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
    super(Conv2DBNBlock, self).build(input_shape)

  def call(self, inputs, training=None):
+    if self._use_explicit_padding and self._kernel_size > 1:
+      inputs = self._pad(inputs)
    x = self._conv0(inputs)
    if self._use_normalization:
      x = self._norm0(x)

--- a/official/vision/beta/modeling/layers/nn_blocks.py
+++ b/official/vision/beta/modeling/layers/nn_blocks.py
@@ -69,6 +69,7 @@ class ResidualBlock(tf.keras.layers.Layer):
               kernel_regularizer=None,
               bias_regularizer=None,
               activation='relu',
+               use_explicit_padding: bool = False,
               use_sync_bn=False,
               norm_momentum=0.99,
               norm_epsilon=0.001,
@@ -97,6 +98,9 @@ class ResidualBlock(tf.keras.layers.Layer):
      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
        Default to None.
      activation: A `str` name of the activation function.
+      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+        inputs so that the output dimensions are the same as if 'SAME' padding
+        were used.
      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A `float` added to variance to avoid dividing by zero.
@@ -111,6 +115,7 @@ class ResidualBlock(tf.keras.layers.Layer):
    self._use_projection = use_projection
    self._se_ratio = se_ratio
    self._resnetd_shortcut = resnetd_shortcut
+    self._use_explicit_padding = use_explicit_padding
    self._use_sync_bn = use_sync_bn
    self._activation = activation
    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
@@ -147,11 +152,17 @@ class ResidualBlock(tf.keras.layers.Layer):
          epsilon=self._norm_epsilon,
          trainable=self._bn_trainable)

+    conv1_padding = 'same'
+    # explicit padding here is added for centernet
+    if self._use_explicit_padding:
+      self._pad = tf.keras.layers.ZeroPadding2D(padding=(1, 1))
+      conv1_padding = 'valid'
+
    self._conv1 = tf.keras.layers.Conv2D(
        filters=self._filters,
        kernel_size=3,
        strides=self._strides,
-        padding='same',
+        padding=conv1_padding,
        use_bias=False,
        kernel_initializer=self._kernel_initializer,
        kernel_regularizer=self._kernel_regularizer,
@@ -208,6 +219,7 @@ class ResidualBlock(tf.keras.layers.Layer):
        'kernel_regularizer': self._kernel_regularizer,
        'bias_regularizer': self._bias_regularizer,
        'activation': self._activation,
+        'use_explicit_padding': self._use_explicit_padding,
        'use_sync_bn': self._use_sync_bn,
        'norm_momentum': self._norm_momentum,
        'norm_epsilon': self._norm_epsilon,
@@ -222,6 +234,8 @@ class ResidualBlock(tf.keras.layers.Layer):
      shortcut = self._shortcut(shortcut)
      shortcut = self._norm0(shortcut)

+    if self._use_explicit_padding:
+      inputs = self._pad(inputs)
    x = self._conv1(inputs)
    x = self._norm1(x)
    x = self._activation_fn(x)

--- a/official/vision/beta/modeling/layers/nn_layers.py
+++ b/official/vision/beta/modeling/layers/nn_layers.py
@@ -69,6 +69,17 @@ def round_filters(filters: int,
  return int(new_filters)


+def get_padding_for_kernel_size(kernel_size):
+  """Compute padding size given kernel size."""
+  if kernel_size == 7:
+    return (3, 3)
+  elif kernel_size == 3:
+    return (1, 1)
+  else:
+    raise ValueError('Padding for kernel size {} not known.'.format(
+        kernel_size))
+
+
 def hard_swish(x: tf.Tensor) -> tf.Tensor:
  """A Swish6/H-Swish activation function.


--- a/official/vision/beta/projects/centernet/README.md
+++ b/official/vision/beta/projects/centernet/README.md
+# Centernet
+
+[![Paper](http://img.shields.io/badge/Paper-arXiv.1904.07850-B3181B?logo=arXiv)](https://arxiv.org/abs/1904.07850)
+
+Centernet builds upon CornerNet, an anchor-free model for object detection.
+
+Many other models, such as YOLO and RetinaNet, use anchor boxes. These anchor
+boxes are predefined to be close to the aspect ratios and scales of the objects
+in the training dataset. Anchor-based models do not predict the bounding boxes
+of objects directly. They instead predict the location and size/shape
+refinements to a predefined anchor box. The detection generator then computes
+the final confidences, positions, and size of the detection.
+
+CornerNet eliminates the need for anchor boxes. RetinaNet needs thousands of
+anchor boxes in order to cover the most common ground truth boxes. This adds
+unnecessary complexity to the model which slow down training and create
+imbalances in positive and negative anchor boxes. Instead, CornerNet creates
+heatmaps for each of the corners and pools them together in order to get the
+final detection boxes for the objects. CenterNet removes even more complexity
+by using the center instead of the corners, meaning that only one set of
+heatmaps (one heatmap for each class) is needed to predict the object. CenterNet
+proves that this can be done without a significant difference in accuracy.
+
+
+## Enviroment setup
+
+The code can be run on multiple GPUs or TPUs with different distribution
+strategies. See the TensorFlow distributed training
+[guide](https://www.tensorflow.org/guide/distributed_training) for an overview
+of `tf.distribute`.
+
+The code is compatible with TensorFlow 2.5+. See requirements.txt for all
+prerequisites, and you can also install them using the following command. `pip
+install -r ./official/requirements.txt`
+
+## Training
+To train the model on Coco, try the following command:
+
+```
+python3 -m official.vision.beta.projects.centernet.train \
+  --mode=train_and_eval \
+  --experiment=centernet_hourglass_coco \
+  --model_dir={MODEL_DIR} \
+  --config_file={CONFIG_FILE}
+```
+
+## Configurations
+
+In the following table, we report the mAP measured on the `coco-val2017` set.
+
+Backbone         | Config name                                     | mAP
+:--------------- | :-----------------------------------------------| -------:
+Hourglass-104    | `coco-centernet-hourglass-gpu.yaml`             | 40.01
+Hourglass-104    | `coco-centernet-hourglass-tpu.yaml`             | 40.5
+
+**Note:** `float16` (`bfloat16` for TPU) is used in the provided configurations.
+
+
+## Cite
+
+[Centernet](https://arxiv.org/abs/1904.07850):
+```
+@article{Zhou2019ObjectsAP,
+  title={Objects as Points},
+  author={Xingyi Zhou and Dequan Wang and Philipp Kr{\"a}henb{\"u}hl},
+  journal={ArXiv},
+  year={2019},
+  volume={abs/1904.07850}
+}
+```
+
+[CornerNet](https://arxiv.org/abs/1808.01244):
+```
+@article{Law2019CornerNetDO,
+  title={CornerNet: Detecting Objects as Paired Keypoints},
+  author={Hei Law and J. Deng},
+  journal={International Journal of Computer Vision},
+  year={2019},
+  volume={128},
+  pages={642-656}
+}
+```
--- a/official/vision/beta/projects/centernet/common/registry_imports.py
+++ b/official/vision/beta/projects/centernet/common/registry_imports.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""All necessary imports for registration."""
+
+# pylint: disable=unused-import
+from official.common import registry_imports
+from official.vision.beta.projects.centernet.configs import centernet
+from official.vision.beta.projects.centernet.modeling import centernet_model
+from official.vision.beta.projects.centernet.modeling.backbones import hourglass
+from official.vision.beta.projects.centernet.tasks import centernet as centernet_task
--- a/official/vision/beta/projects/centernet/configs/__init__.py
+++ b/official/vision/beta/projects/centernet/configs/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
--- a/official/vision/beta/projects/centernet/configs/backbones.py
+++ b/official/vision/beta/projects/centernet/configs/backbones.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Backbones configurations."""
+
+import dataclasses
+
+from official.modeling import hyperparams
+from official.vision.beta.configs import backbones
+
+
+@dataclasses.dataclass
+class Hourglass(hyperparams.Config):
+  """Hourglass config."""
+  model_id: int = 52
+  input_channel_dims: int = 128
+  num_hourglasses: int = 2
+  initial_downsample: bool = True
+  activation: str = 'relu'
+
+
+@dataclasses.dataclass
+class Backbone(backbones.Backbone):
+  hourglass: Hourglass = Hourglass()
--- a/official/vision/beta/projects/centernet/configs/centernet.py
+++ b/official/vision/beta/projects/centernet/configs/centernet.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""CenterNet configuration definition."""
+
+import dataclasses
+import os
+from typing import List, Optional, Tuple
+
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.modeling import optimization
+from official.modeling.hyperparams import config_definitions as cfg
+from official.vision.beta.configs import common
+from official.vision.beta.projects.centernet.configs import backbones
+
+
+TfExampleDecoderLabelMap = common.TfExampleDecoderLabelMap
+
+
+@dataclasses.dataclass
+class TfExampleDecoder(hyperparams.Config):
+  regenerate_source_id: bool = False
+
+
+@dataclasses.dataclass
+class DataDecoder(hyperparams.OneOfConfig):
+  type: Optional[str] = 'simple_decoder'
+  simple_decoder: TfExampleDecoder = TfExampleDecoder()
+  label_map_decoder: TfExampleDecoderLabelMap = TfExampleDecoderLabelMap()
+
+
+@dataclasses.dataclass
+class Parser(hyperparams.Config):
+  """Config for parser."""
+  bgr_ordering: bool = True
+  aug_rand_hflip: bool = True
+  aug_scale_min: float = 1.0
+  aug_scale_max: float = 1.0
+  aug_rand_saturation: bool = False
+  aug_rand_brightness: bool = False
+  aug_rand_hue: bool = False
+  aug_rand_contrast: bool = False
+  odapi_augmentation: bool = False
+  channel_means: Tuple[float, float, float] = dataclasses.field(
+      default_factory=lambda: (104.01362025, 114.03422265, 119.9165958))
+  channel_stds: Tuple[float, float, float] = dataclasses.field(
+      default_factory=lambda: (73.6027665, 69.89082075, 70.9150767))
+
+
+@dataclasses.dataclass
+class DataConfig(cfg.DataConfig):
+  """Input config for training."""
+  input_path: str = ''
+  global_batch_size: int = 32
+  is_training: bool = True
+  dtype: str = 'float16'
+  decoder: DataDecoder = DataDecoder()
+  parser: Parser = Parser()
+  shuffle_buffer_size: int = 10000
+  file_type: str = 'tfrecord'
+  drop_remainder: bool = True
+
+
+@dataclasses.dataclass
+class DetectionLoss(hyperparams.Config):
+  object_center_weight: float = 1.0
+  offset_weight: float = 1.0
+  scale_weight: float = 0.1
+
+
+@dataclasses.dataclass
+class Losses(hyperparams.Config):
+  detection: DetectionLoss = DetectionLoss()
+  gaussian_iou: float = 0.7
+  class_offset: int = 1
+
+
+@dataclasses.dataclass
+class CenterNetHead(hyperparams.Config):
+  heatmap_bias: float = -2.19
+  input_levels: List[str] = dataclasses.field(
+      default_factory=lambda: ['2_0', '2'])
+
+
+@dataclasses.dataclass
+class CenterNetDetectionGenerator(hyperparams.Config):
+  max_detections: int = 100
+  peak_error: float = 1e-6
+  peak_extract_kernel_size: int = 3
+  class_offset: int = 1
+  use_nms: bool = False
+  nms_pre_thresh: float = 0.1
+  nms_thresh: float = 0.4
+  use_reduction_sum: bool = True
+
+
+@dataclasses.dataclass
+class CenterNetModel(hyperparams.Config):
+  """Config for centernet model."""
+  num_classes: int = 90
+  max_num_instances: int = 128
+  input_size: List[int] = dataclasses.field(default_factory=list)
+  backbone: backbones.Backbone = backbones.Backbone(
+      type='hourglass', hourglass=backbones.Hourglass(model_id=52))
+  head: CenterNetHead = CenterNetHead()
+  # pylint: disable=line-too-long
+  detection_generator: CenterNetDetectionGenerator = CenterNetDetectionGenerator()
+  norm_activation: common.NormActivation = common.NormActivation(
+      norm_momentum=0.1, norm_epsilon=1e-5, use_sync_bn=True)
+
+
+@dataclasses.dataclass
+class CenterNetDetection(hyperparams.Config):
+  # use_center is the only option implemented currently.
+  use_centers: bool = True
+
+
+@dataclasses.dataclass
+class CenterNetSubTasks(hyperparams.Config):
+  detection: CenterNetDetection = CenterNetDetection()
+
+
+@dataclasses.dataclass
+class CenterNetTask(cfg.TaskConfig):
+  """Config for centernet task."""
+  model: CenterNetModel = CenterNetModel()
+  train_data: DataConfig = DataConfig(is_training=True)
+  validation_data: DataConfig = DataConfig(is_training=False)
+  subtasks: CenterNetSubTasks = CenterNetSubTasks()
+  losses: Losses = Losses()
+  gradient_clip_norm: float = 10.0
+  per_category_metrics: bool = False
+  weight_decay: float = 5e-4
+  # Load checkpoints
+  init_checkpoint: Optional[str] = None
+  init_checkpoint_modules: str = 'all'
+  annotation_file: Optional[str] = None
+
+  def get_output_length_dict(self):
+    task_outputs = {}
+    if self.subtasks.detection and self.subtasks.detection.use_centers:
+      task_outputs.update({
+          'ct_heatmaps': self.model.num_classes,
+          'ct_offset': 2,
+          'ct_size': 2
+      })
+    else:
+      raise ValueError('Detection with center point is only available ')
+    return task_outputs
+
+
+COCO_INPUT_PATH_BASE = 'coco'
+COCO_TRAIN_EXAMPLES = 118287
+COCO_VAL_EXAMPLES = 5000
+
+
+@exp_factory.register_config_factory('centernet_hourglass_coco')
+def centernet_hourglass_coco() -> cfg.ExperimentConfig:
+  """COCO object detection with CenterNet."""
+  train_batch_size = 128
+  eval_batch_size = 8
+  steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
+
+  config = cfg.ExperimentConfig(
+      task=CenterNetTask(
+          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
+                                       'instances_val2017.json'),
+          model=CenterNetModel(),
+          train_data=DataConfig(
+              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
+              is_training=True,
+              global_batch_size=train_batch_size,
+              parser=Parser(),
+              shuffle_buffer_size=2),
+          validation_data=DataConfig(
+              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
+              is_training=False,
+              global_batch_size=eval_batch_size,
+              shuffle_buffer_size=2),
+      ),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          train_steps=150 * steps_per_epoch,
+          validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'adam',
+                  'adam': {
+                      'epsilon': 1e-7
+                  }
+              },
+              'learning_rate': {
+                  'type': 'cosine',
+                  'cosine': {
+                      'initial_learning_rate': 0.001,
+                      'decay_steps': 150 * steps_per_epoch
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 2000,
+                  }
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+
+  return config
--- a/official/vision/beta/projects/centernet/configs/centernet_test.py
+++ b/official/vision/beta/projects/centernet/configs/centernet_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for centernet."""
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.vision.beta.projects.centernet.common import registry_imports  # pylint: disable=unused-import
+from official.vision.beta.projects.centernet.configs import centernet as exp_cfg
+
+
+class CenterNetConfigTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(('centernet_hourglass_coco',))
+  def test_centernet_configs(self, config_name):
+    config = exp_factory.get_exp_config(config_name)
+    self.assertIsInstance(config, cfg.ExperimentConfig)
+    self.assertIsInstance(config.task, exp_cfg.CenterNetTask)
+    self.assertIsInstance(config.task.model, exp_cfg.CenterNetModel)
+    self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
+    config.task.train_data.is_training = None
+    with self.assertRaises(KeyError):
+      config.validate()
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/projects/centernet/configs/experiments/coco-centernet-hourglass-gpu.yaml
+++ b/official/vision/beta/projects/centernet/configs/experiments/coco-centernet-hourglass-gpu.yaml
+# COCO AP 40.01% for float16 precision is achieved with the configuration below.
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float16'
+  loss_scale: 'dynamic'
+  num_gpus: 8
+task:
+  model:
+    num_classes: 90
+    max_num_instances: 128
+    input_size: [512, 512, 3]
+    backbone:
+      type: hourglass
+      hourglass:
+        model_id: 52
+        num_hourglasses: 2
+    head:
+      heatmap_bias: -2.19
+      input_levels: ['2_0', '2']
+    detection_generator:
+      max_detections: 100
+      peak_error: 0.000001
+      peak_extract_kernel_size: 3
+      use_nms: false
+      nms_pre_thresh: 0.1
+      nms_thresh: 0.4
+      class_offset: 1
+    norm_activation:
+      norm_epsilon: 0.00001
+      norm_momentum: 0.1
+      use_sync_bn: true
+  losses:
+    detection:
+      offset_weight: 1.0
+      scale_weight: 0.1
+    gaussian_iou: 0.7
+    class_offset: 1
+  per_category_metrics: false
+  weight_decay: 0.0005
+  gradient_clip_norm: 10.0
+  annotation_file: 'coco/instances_val2017.json'
+  init_checkpoint: '/placer/prod/scratch/home/tf-model-garden-dev/vision/centernet/extremenet_hg104_512x512_coco17/2021-10-19'
+  init_checkpoint_modules: 'backbone'
+  train_data:
+    input_path: 'coco/train*'
+    drop_remainder: true
+    dtype: 'float16'
+    global_batch_size: 64
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_min: 0.6
+      aug_scale_max: 1.3
+      aug_rand_saturation: true
+      aug_rand_brightness: true
+      aug_rand_hue: true
+      aug_rand_contrast: true
+      odapi_augmentation: true
+  validation_data:
+    input_path: 'coco/val*'
+    drop_remainder: false
+    dtype: 'float16'
+    global_batch_size: 16
+    is_training: false
+trainer:
+  train_steps: 280000
+  validation_steps: 312  # 5000 / 16
+  steps_per_loop: 1848  # 118287 / 128
+  validation_interval: 1848
+  summary_interval: 1848
+  checkpoint_interval: 1848
+  optimizer_config:
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 0.0005
+        decay_steps: 280000
+    optimizer:
+      type: adam
+      adam:
+        epsilon: 0.0000001
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 2000
--- a/official/vision/beta/projects/centernet/configs/experiments/coco-centernet-hourglass-tpu.yaml
+++ b/official/vision/beta/projects/centernet/configs/experiments/coco-centernet-hourglass-tpu.yaml
+# COCO AP 40.6% for float16 precision is achieved with the configuration below.
+# Expected COCO AP for float32 from OD API is 41.92 +/- 0.16.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 90
+    max_num_instances: 128
+    input_size: [512, 512, 3]
+    backbone:
+      type: hourglass
+      hourglass:
+        model_id: 52
+        num_hourglasses: 2
+    head:
+      heatmap_bias: -2.19
+      input_levels: ['2_0', '2']
+    detection_generator:
+      max_detections: 100
+      peak_error: 0.000001
+      peak_extract_kernel_size: 3
+      use_nms: false
+      nms_pre_thresh: 0.1
+      nms_thresh: 0.4
+      class_offset: 1
+    norm_activation:
+      norm_epsilon: 0.00001
+      norm_momentum: 0.1
+      use_sync_bn: true
+  losses:
+    detection:
+      offset_weight: 1.0
+      scale_weight: 0.1
+    gaussian_iou: 0.7
+    class_offset: 1
+  per_category_metrics: false
+  weight_decay: 0.0005
+  gradient_clip_norm: 10.0
+  annotation_file: 'coco/instances_val2017.json'
+  init_checkpoint: '/placer/prod/scratch/home/tf-model-garden-dev/vision/centernet/extremenet_hg104_512x512_coco17/2021-10-19'
+  init_checkpoint_modules: 'backbone'
+  train_data:
+    input_path: 'coco/train*'
+    drop_remainder: true
+    dtype: 'bfloat16'
+    global_batch_size: 128
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_min: 0.6
+      aug_scale_max: 1.3
+      aug_rand_saturation: true
+      aug_rand_brightness: true
+      aug_rand_hue: true
+      aug_rand_contrast: true
+      odapi_augmentation: true
+  validation_data:
+    input_path: 'coco/val*'
+    drop_remainder: false
+    dtype: 'bfloat16'
+    global_batch_size: 16
+    is_training: false
+trainer:
+  train_steps: 140000
+  validation_steps: 78  # 5000 / 16
+  steps_per_loop: 924  # 118287 / 128
+  validation_interval: 924
+  summary_interval: 924
+  checkpoint_interval: 924
+  optimizer_config:
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 0.001
+        decay_steps: 140000
+    optimizer:
+      type: adam
+      adam:
+        epsilon: 0.0000001
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 2000
--- a/official/vision/beta/projects/centernet/dataloaders/centernet_input.py
+++ b/official/vision/beta/projects/centernet/dataloaders/centernet_input.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Data parser and processing for Centernet."""
+
+from typing import Tuple
+
+import tensorflow as tf
+
+from official.vision.beta.dataloaders import parser
+from official.vision.beta.dataloaders import utils
+from official.vision.beta.ops import box_ops
+from official.vision.beta.ops import preprocess_ops
+from official.vision.beta.projects.centernet.ops import box_list
+from official.vision.beta.projects.centernet.ops import box_list_ops
+from official.vision.beta.projects.centernet.ops import preprocess_ops as cn_prep_ops
+
+
+CHANNEL_MEANS = (104.01362025, 114.03422265, 119.9165958)
+CHANNEL_STDS = (73.6027665, 69.89082075, 70.9150767)
+
+
+class CenterNetParser(parser.Parser):
+  """Parse an image and its annotations into a dictionary of tensors."""
+
+  def __init__(self,
+               output_width: int = 512,
+               output_height: int = 512,
+               max_num_instances: int = 128,
+               bgr_ordering: bool = True,
+               aug_rand_hflip=True,
+               aug_scale_min=1.0,
+               aug_scale_max=1.0,
+               aug_rand_saturation=False,
+               aug_rand_brightness=False,
+               aug_rand_hue=False,
+               aug_rand_contrast=False,
+               odapi_augmentation=False,
+               channel_means: Tuple[float, float, float] = CHANNEL_MEANS,
+               channel_stds: Tuple[float, float, float] = CHANNEL_STDS,
+               dtype: str = 'float32'):
+    """Initializes parameters for parsing annotations in the dataset.
+
+    Args:
+      output_width: A `Tensor` or `int` for width of output image.
+      output_height: A `Tensor` or `int` for height of output image.
+      max_num_instances: An `int` number of maximum number of instances
+        in an image.
+      bgr_ordering: `bool`, if set will change the channel ordering to be in the
+        [blue, red, green] order.
+      aug_rand_hflip: `bool`, if True, augment training with random horizontal
+        flip.
+      aug_scale_min: `float`, the minimum scale applied to `output_size` for
+        data augmentation during training.
+      aug_scale_max: `float`, the maximum scale applied to `output_size` for
+        data augmentation during training.
+      aug_rand_saturation: `bool`, if True, augment training with random
+        saturation.
+      aug_rand_brightness: `bool`, if True, augment training with random
+        brightness.
+      aug_rand_hue: `bool`, if True, augment training with random hue.
+      aug_rand_contrast: `bool`, if True, augment training with random contrast.
+      odapi_augmentation: `bool`, if Ture, use OD API preprocessing.
+      channel_means: A tuple of floats, denoting the mean of each channel
+        which will be subtracted from it.
+      channel_stds: A tuple of floats, denoting the standard deviation of each
+        channel. Each channel will be divided by its standard deviation value.
+      dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
+
+    Raises:
+      Exception: if datatype is not supported.
+    """
+    self._output_width = output_width
+    self._output_height = output_height
+    self._max_num_instances = max_num_instances
+    self._bgr_ordering = bgr_ordering
+    self._channel_means = channel_means
+    self._channel_stds = channel_stds
+
+    if dtype == 'float16':
+      self._dtype = tf.float16
+    elif dtype == 'bfloat16':
+      self._dtype = tf.bfloat16
+    elif dtype == 'float32':
+      self._dtype = tf.float32
+    else:
+      raise Exception(
+          'Unsupported datatype used in parser only '
+          '{float16, bfloat16, or float32}')
+
+    # Data augmentation.
+    self._aug_rand_hflip = aug_rand_hflip
+    self._aug_scale_min = aug_scale_min
+    self._aug_scale_max = aug_scale_max
+    self._aug_rand_saturation = aug_rand_saturation
+    self._aug_rand_brightness = aug_rand_brightness
+    self._aug_rand_hue = aug_rand_hue
+    self._aug_rand_contrast = aug_rand_contrast
+    self._odapi_augmentation = odapi_augmentation
+
+  def _build_label(self,
+                   boxes,
+                   classes,
+                   image_info,
+                   unpad_image_shape,
+                   data):
+
+    # Sets up groundtruth data for evaluation.
+    groundtruths = {
+        'source_id': data['source_id'],
+        'height': data['height'],
+        'width': data['width'],
+        'num_detections': tf.shape(data['groundtruth_classes'])[0],
+        'boxes': box_ops.denormalize_boxes(
+            data['groundtruth_boxes'], tf.shape(input=data['image'])[0:2]),
+        'classes': data['groundtruth_classes'],
+        'areas': data['groundtruth_area'],
+        'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
+    }
+
+    groundtruths['source_id'] = utils.process_source_id(
+        groundtruths['source_id'])
+    groundtruths = utils.pad_groundtruths_to_fixed_size(
+        groundtruths, self._max_num_instances)
+
+    labels = {
+        'boxes': preprocess_ops.clip_or_pad_to_fixed_size(
+            boxes, self._max_num_instances, -1),
+        'classes': preprocess_ops.clip_or_pad_to_fixed_size(
+            classes, self._max_num_instances, -1),
+        'image_info': image_info,
+        'unpad_image_shapes': unpad_image_shape,
+        'groundtruths': groundtruths
+    }
+
+    return labels
+
+  def _parse_train_data(self, data):
+    """Generates images and labels that are usable for model training.
+
+    We use random flip, random scaling (between 0.6 to 1.3), cropping,
+    and color jittering as data augmentation
+
+    Args:
+        data: the decoded tensor dictionary from TfExampleDecoder.
+
+    Returns:
+        images: the image tensor.
+        labels: a dict of Tensors that contains labels.
+    """
+
+    image = tf.cast(data['image'], dtype=tf.float32)
+    boxes = data['groundtruth_boxes']
+    classes = data['groundtruth_classes']
+
+    image_shape = tf.shape(input=image)[0:2]
+
+    if self._aug_rand_hflip:
+      image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes)
+
+    # Image augmentation
+    if not self._odapi_augmentation:
+      # Color and lighting jittering
+      if self._aug_rand_hue:
+        image = tf.image.random_hue(
+            image=image, max_delta=.02)
+      if self._aug_rand_contrast:
+        image = tf.image.random_contrast(
+            image=image, lower=0.8, upper=1.25)
+      if self._aug_rand_saturation:
+        image = tf.image.random_saturation(
+            image=image, lower=0.8, upper=1.25)
+      if self._aug_rand_brightness:
+        image = tf.image.random_brightness(
+            image=image, max_delta=.2)
+      image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0)
+      # Converts boxes from normalized coordinates to pixel coordinates.
+      boxes = box_ops.denormalize_boxes(boxes, image_shape)
+
+      # Resizes and crops image.
+      image, image_info = preprocess_ops.resize_and_crop_image(
+          image,
+          [self._output_height, self._output_width],
+          padded_size=[self._output_height, self._output_width],
+          aug_scale_min=self._aug_scale_min,
+          aug_scale_max=self._aug_scale_max)
+      unpad_image_shape = tf.cast(tf.shape(image), tf.float32)
+
+      # Resizes and crops boxes.
+      image_scale = image_info[2, :]
+      offset = image_info[3, :]
+      boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
+                                                   image_info[1, :], offset)
+
+    else:
+      # Color and lighting jittering
+      if self._aug_rand_hue:
+        image = cn_prep_ops.random_adjust_hue(
+            image=image, max_delta=.02)
+      if self._aug_rand_contrast:
+        image = cn_prep_ops.random_adjust_contrast(
+            image=image, min_delta=0.8, max_delta=1.25)
+      if self._aug_rand_saturation:
+        image = cn_prep_ops.random_adjust_saturation(
+            image=image, min_delta=0.8, max_delta=1.25)
+      if self._aug_rand_brightness:
+        image = cn_prep_ops.random_adjust_brightness(
+            image=image, max_delta=.2)
+
+      sc_image, sc_boxes, classes = cn_prep_ops.random_square_crop_by_scale(
+          image=image,
+          boxes=boxes,
+          labels=classes,
+          scale_min=self._aug_scale_min,
+          scale_max=self._aug_scale_max)
+
+      image, unpad_image_shape = cn_prep_ops.resize_to_range(
+          image=sc_image,
+          min_dimension=self._output_width,
+          max_dimension=self._output_width,
+          pad_to_max_dimension=True)
+      preprocessed_shape = tf.cast(tf.shape(image), tf.float32)
+      unpad_image_shape = tf.cast(unpad_image_shape, tf.float32)
+
+      im_box = tf.stack([
+          0.0,
+          0.0,
+          preprocessed_shape[0] / unpad_image_shape[0],
+          preprocessed_shape[1] / unpad_image_shape[1]
+      ])
+      realigned_bboxes = box_list_ops.change_coordinate_frame(
+          boxlist=box_list.BoxList(sc_boxes),
+          window=im_box)
+
+      valid_boxes = box_list_ops.assert_or_prune_invalid_boxes(
+          realigned_bboxes.get())
+
+      boxes = box_list_ops.to_absolute_coordinates(
+          boxlist=box_list.BoxList(valid_boxes),
+          height=self._output_height,
+          width=self._output_width).get()
+
+      image_info = tf.stack([
+          tf.cast(image_shape, dtype=tf.float32),
+          tf.constant([self._output_height, self._output_width],
+                      dtype=tf.float32),
+          tf.cast(tf.shape(sc_image)[0:2] / image_shape, dtype=tf.float32),
+          tf.constant([0., 0.])
+      ])
+
+    # Filters out ground truth boxes that are all zeros.
+    indices = box_ops.get_non_empty_box_indices(boxes)
+    boxes = tf.gather(boxes, indices)
+    classes = tf.gather(classes, indices)
+
+    labels = self._build_label(
+        unpad_image_shape=unpad_image_shape,
+        boxes=boxes,
+        classes=classes,
+        image_info=image_info,
+        data=data)
+
+    if self._bgr_ordering:
+      red, green, blue = tf.unstack(image, num=3, axis=2)
+      image = tf.stack([blue, green, red], axis=2)
+
+    image = preprocess_ops.normalize_image(
+        image=image,
+        offset=self._channel_means,
+        scale=self._channel_stds)
+
+    image = tf.cast(image, self._dtype)
+
+    return image, labels
+
+  def _parse_eval_data(self, data):
+    """Generates images and labels that are usable for model evaluation.
+
+    Args:
+      data: the decoded tensor dictionary from TfExampleDecoder.
+
+    Returns:
+      images: the image tensor.
+      labels: a dict of Tensors that contains labels.
+    """
+    image = tf.cast(data['image'], dtype=tf.float32)
+    boxes = data['groundtruth_boxes']
+    classes = data['groundtruth_classes']
+
+    image_shape = tf.shape(input=image)[0:2]
+    # Converts boxes from normalized coordinates to pixel coordinates.
+    boxes = box_ops.denormalize_boxes(boxes, image_shape)
+
+    # Resizes and crops image.
+    image, image_info = preprocess_ops.resize_and_crop_image(
+        image,
+        [self._output_height, self._output_width],
+        padded_size=[self._output_height, self._output_width],
+        aug_scale_min=1.0,
+        aug_scale_max=1.0)
+    unpad_image_shape = tf.cast(tf.shape(image), tf.float32)
+
+    # Resizes and crops boxes.
+    image_scale = image_info[2, :]
+    offset = image_info[3, :]
+    boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
+                                                 image_info[1, :], offset)
+
+    # Filters out ground truth boxes that are all zeros.
+    indices = box_ops.get_non_empty_box_indices(boxes)
+    boxes = tf.gather(boxes, indices)
+    classes = tf.gather(classes, indices)
+
+    labels = self._build_label(
+        unpad_image_shape=unpad_image_shape,
+        boxes=boxes,
+        classes=classes,
+        image_info=image_info,
+        data=data)
+
+    if self._bgr_ordering:
+      red, green, blue = tf.unstack(image, num=3, axis=2)
+      image = tf.stack([blue, green, red], axis=2)
+
+    image = preprocess_ops.normalize_image(
+        image=image,
+        offset=self._channel_means,
+        scale=self._channel_stds)
+
+    image = tf.cast(image, self._dtype)
+
+    return image, labels
--- a/official/vision/beta/projects/centernet/losses/centernet_losses.py
+++ b/official/vision/beta/projects/centernet/losses/centernet_losses.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Losses for centernet model."""
+
+
+import tensorflow as tf
+
+
+class PenaltyReducedLogisticFocalLoss(object):
+  """Penalty-reduced pixelwise logistic regression with focal loss."""
+
+  def __init__(self, alpha=2.0, beta=4.0, sigmoid_clip_value=1e-4):
+    """Constructor.
+
+    The loss is defined in Equation (1) of the Objects as Points[1] paper.
+    Although the loss is defined per-pixel in the output space, this class
+    assumes that each pixel is an anchor to be compatible with the base class.
+
+    [1]: https://arxiv.org/abs/1904.07850
+
+    Args:
+      alpha: Focussing parameter of the focal loss. Increasing this will
+        decrease the loss contribution of the well classified examples.
+      beta: The local penalty reduction factor. Increasing this will decrease
+        the contribution of loss due to negative pixels near the keypoint.
+      sigmoid_clip_value: The sigmoid operation used internally will be clipped
+        between [sigmoid_clip_value, 1 - sigmoid_clip_value)
+    """
+    self._alpha = alpha
+    self._beta = beta
+    self._sigmoid_clip_value = sigmoid_clip_value
+    super(PenaltyReducedLogisticFocalLoss, self).__init__()
+
+  def __call__(self, prediction_tensor, target_tensor, weights=1.0):
+    """Compute loss function.
+
+    In all input tensors, `num_anchors` is the total number of pixels in the
+    the output space.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing the predicted unscaled logits for each class.
+        The function will compute sigmoid on this tensor internally.
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing a tensor with the 'splatted' keypoints,
+        possibly using a gaussian kernel. This function assumes that
+        the target is bounded between [0, 1].
+      weights: a float tensor of shape, either [batch_size, num_anchors,
+        num_classes] or [batch_size, num_anchors, 1]. If the shape is
+        [batch_size, num_anchors, 1], all the classses are equally weighted.
+
+    Returns:
+      loss: a float tensor of shape [batch_size, num_anchors, num_classes]
+        representing the value of the loss function.
+    """
+    with tf.name_scope('prlf_loss'):
+      is_present_tensor = tf.math.equal(target_tensor, 1.0)
+      prediction_tensor = tf.clip_by_value(tf.sigmoid(prediction_tensor),
+                                           self._sigmoid_clip_value,
+                                           1 - self._sigmoid_clip_value)
+
+      positive_loss = (tf.math.pow((1 - prediction_tensor), self._alpha) *
+                       tf.math.log(prediction_tensor))
+      negative_loss = (tf.math.pow((1 - target_tensor), self._beta) *
+                       tf.math.pow(prediction_tensor, self._alpha) *
+                       tf.math.log(1 - prediction_tensor))
+
+      loss = -tf.where(is_present_tensor, positive_loss, negative_loss)
+      return loss * weights
+
+
+class L1LocalizationLoss(object):
+  """L1 loss or absolute difference."""
+
+  def __call__(self, prediction_tensor, target_tensor, weights=1.0):
+    """Compute loss function.
+
+    When used in a per-pixel manner, each pixel should be given as an anchor.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors]
+        representing the (encoded) predicted locations of objects.
+      target_tensor: A float tensor of shape [batch_size, num_anchors]
+        representing the regression targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a float tensor of shape [batch_size, num_anchors] tensor
+        representing the value of the loss function.
+    """
+    with tf.name_scope('l1l_loss'):
+      return tf.compat.v1.losses.absolute_difference(
+          labels=target_tensor,
+          predictions=prediction_tensor,
+          weights=weights,
+          reduction=tf.losses.Reduction.NONE
+      )
--- a/official/vision/beta/projects/centernet/losses/centernet_losses_test.py
+++ b/official/vision/beta/projects/centernet/losses/centernet_losses_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for losses of centernet model."""
+
+import numpy as np
+import tensorflow as tf
+
+from official.vision.beta.projects.centernet.losses import centernet_losses
+
+LOG_2 = np.log(2)
+LOG_3 = np.log(3)
+
+
+class L1LocalizationLossTest(tf.test.TestCase):
+
+  def test_returns_correct_loss(self):
+    def graph_fn():
+      loss = centernet_losses.L1LocalizationLoss()
+      pred = [[0.1, 0.2], [0.7, 0.5]]
+      target = [[0.9, 1.0], [0.1, 0.4]]
+
+      weights = [[1.0, 0.0], [1.0, 1.0]]
+      return loss(pred, target, weights=weights)
+
+    computed_value = graph_fn()
+    self.assertAllClose(computed_value, [[0.8, 0.0], [0.6, 0.1]], rtol=1e-6)
+
+
+class PenaltyReducedLogisticFocalLossTest(tf.test.TestCase):
+  """Testing loss function."""
+
+  def __init__(self, *args, **kwargs):
+    super().__init__(*args, **kwargs)
+    self._prediction = np.array([
+        # First batch
+        [[1 / 2, 1 / 4, 3 / 4],
+         [3 / 4, 1 / 3, 1 / 3]],
+        # Second Batch
+        [[0.0, 1.0, 1 / 2],
+         [3 / 4, 2 / 3, 1 / 3]]], np.float32)
+    self._prediction = np.log(self._prediction / (1 - self._prediction))
+
+    self._target = np.array([
+        # First batch
+        [[1.0, 0.91, 1.0],
+         [0.36, 0.84, 1.0]],
+        # Second Batch
+        [[0.01, 1.0, 0.75],
+         [0.96, 1.0, 1.0]]], np.float32)
+
+  def test_returns_correct_loss(self):
+    def graph_fn(prediction, target):
+      weights = tf.constant([
+          [[1.0], [1.0]],
+          [[1.0], [1.0]],
+      ])
+      loss = centernet_losses.PenaltyReducedLogisticFocalLoss(
+          alpha=2.0, beta=0.5)
+      computed_value = loss(prediction, target, weights=weights)
+      return computed_value
+
+    computed_value = graph_fn(self._prediction, self._target)
+    expected_value = np.array([
+        # First batch
+        [[1 / 4 * LOG_2,
+          0.3 * 0.0625 * (2 * LOG_2 - LOG_3),
+          1 / 16 * (2 * LOG_2 - LOG_3)],
+         [0.8 * 9 / 16 * 2 * LOG_2,
+          0.4 * 1 / 9 * (LOG_3 - LOG_2),
+          4 / 9 * LOG_3]],
+        # Second Batch
+        [[0.0,
+          0.0,
+          1 / 2 * 1 / 4 * LOG_2],
+         [0.2 * 9 / 16 * 2 * LOG_2,
+          1 / 9 * (LOG_3 - LOG_2),
+          4 / 9 * LOG_3]]])
+    self.assertAllClose(expected_value, computed_value, rtol=1e-3, atol=1e-3)
+
+  def test_returns_correct_loss_weighted(self):
+    def graph_fn(prediction, target):
+      weights = tf.constant([
+          [[1.0, 0.0, 1.0], [0.0, 0.0, 1.0]],
+          [[1.0, 1.0, 1.0], [0.0, 0.0, 0.0]],
+      ])
+
+      loss = centernet_losses.PenaltyReducedLogisticFocalLoss(
+          alpha=2.0, beta=0.5)
+
+      computed_value = loss(prediction, target, weights=weights)
+      return computed_value
+
+    computed_value = graph_fn(self._prediction, self._target)
+    expected_value = np.array([
+        # First batch
+        [[1 / 4 * LOG_2,
+          0.0,
+          1 / 16 * (2 * LOG_2 - LOG_3)],
+         [0.0,
+          0.0,
+          4 / 9 * LOG_3]],
+        # Second Batch
+        [[0.0,
+          0.0,
+          1 / 2 * 1 / 4 * LOG_2],
+         [0.0,
+          0.0,
+          0.0]]])
+
+    self.assertAllClose(expected_value, computed_value, rtol=1e-3, atol=1e-3)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/projects/centernet/modeling/backbones/hourglass.py
+++ b/official/vision/beta/projects/centernet/modeling/backbones/hourglass.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Build Hourglass backbone."""
+
+from typing import Optional
+
+import tensorflow as tf
+
+from official.modeling import hyperparams
+from official.vision.beta.modeling.backbones import factory
+from official.vision.beta.modeling.backbones import mobilenet
+from official.vision.beta.modeling.layers import nn_blocks
+from official.vision.beta.projects.centernet.modeling.layers import cn_nn_blocks
+
+HOURGLASS_SPECS = {
+    10: {
+        'blocks_per_stage': [1, 1],
+        'channel_dims_per_stage': [2, 2]
+    },
+    20: {
+        'blocks_per_stage': [1, 2, 2],
+        'channel_dims_per_stage': [2, 2, 3]
+    },
+    32: {
+        'blocks_per_stage': [2, 2, 2, 2],
+        'channel_dims_per_stage': [2, 2, 3, 3]
+    },
+    52: {
+        'blocks_per_stage': [2, 2, 2, 2, 2, 4],
+        'channel_dims_per_stage': [2, 2, 3, 3, 3, 4]
+    },
+    100: {
+        'blocks_per_stage': [4, 4, 4, 4, 4, 8],
+        'channel_dims_per_stage': [2, 2, 3, 3, 3, 4]
+    },
+}
+
+
+@tf.keras.utils.register_keras_serializable(package='centernet')
+class Hourglass(tf.keras.Model):
+  """CenterNet Hourglass backbone."""
+
+  def __init__(
+      self,
+      model_id: int,
+      input_channel_dims: int,
+      input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
+      num_hourglasses: int = 1,
+      initial_downsample: bool = True,
+      activation: str = 'relu',
+      use_sync_bn: bool = True,
+      norm_momentum=0.1,
+      norm_epsilon=1e-5,
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initialize Hourglass backbone.
+
+    Args:
+      model_id: An `int` of the scale of Hourglass backbone model.
+      input_channel_dims: `int`, number of filters used to downsample the
+        input image.
+      input_specs: A `tf.keras.layers.InputSpec` of specs of the input tensor.
+      num_hourglasses: `int``, number of hourglass blocks in backbone. For
+        example, hourglass-104 has two hourglass-52 modules.
+      initial_downsample: `bool`, whether or not to downsample the input.
+      activation: A `str` name of the activation function.
+      use_sync_bn: If True, use synchronized batch normalization.
+      norm_momentum: `float`, momentum for the batch normalization layers.
+      norm_epsilon: `float`, epsilon for the batch normalization layers.
+      kernel_initializer: A `str` for kernel initializer of conv layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+        Default to None.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    self._input_channel_dims = input_channel_dims
+    self._model_id = model_id
+    self._num_hourglasses = num_hourglasses
+    self._initial_downsample = initial_downsample
+    self._activation = activation
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+
+    specs = HOURGLASS_SPECS[model_id]
+    self._blocks_per_stage = specs['blocks_per_stage']
+    self._channel_dims_per_stage = [item * self._input_channel_dims
+                                    for item in specs['channel_dims_per_stage']]
+
+    inputs = tf.keras.layers.Input(shape=input_specs.shape[1:])
+
+    inp_filters = self._channel_dims_per_stage[0]
+
+    # Downsample the input
+    if initial_downsample:
+      prelayer_kernel_size = 7
+      prelayer_strides = 2
+    else:
+      prelayer_kernel_size = 3
+      prelayer_strides = 1
+
+    x_downsampled = mobilenet.Conv2DBNBlock(
+        filters=self._input_channel_dims,
+        kernel_size=prelayer_kernel_size,
+        strides=prelayer_strides,
+        use_explicit_padding=True,
+        activation=self._activation,
+        bias_regularizer=self._bias_regularizer,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        use_sync_bn=self._use_sync_bn,
+        norm_momentum=self._norm_momentum,
+        norm_epsilon=self._norm_epsilon)(inputs)
+
+    x_downsampled = nn_blocks.ResidualBlock(
+        filters=inp_filters,
+        use_projection=True,
+        use_explicit_padding=True,
+        strides=prelayer_strides,
+        bias_regularizer=self._bias_regularizer,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        use_sync_bn=self._use_sync_bn,
+        norm_momentum=self._norm_momentum,
+        norm_epsilon=self._norm_epsilon)(x_downsampled)
+
+    all_heatmaps = {}
+    for i in range(num_hourglasses):
+      # Create an hourglass stack
+      x_hg = cn_nn_blocks.HourglassBlock(
+          channel_dims_per_stage=self._channel_dims_per_stage,
+          blocks_per_stage=self._blocks_per_stage,
+      )(x_downsampled)
+
+      x_hg = mobilenet.Conv2DBNBlock(
+          filters=inp_filters,
+          kernel_size=3,
+          strides=1,
+          use_explicit_padding=True,
+          activation=self._activation,
+          bias_regularizer=self._bias_regularizer,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          use_sync_bn=self._use_sync_bn,
+          norm_momentum=self._norm_momentum,
+          norm_epsilon=self._norm_epsilon
+      )(x_hg)
+
+      # Given two down-sampling blocks above, the starting level is set to 2
+      # To make it compatible with implementation of remaining backbones, the
+      # output of hourglass backbones is organized as
+      # '2' -> the last layer of output
+      # '2_0' -> the first layer of output
+      # ......
+      # '2_{num_hourglasses-2}' -> the second to last layer of output
+      if i < num_hourglasses - 1:
+        all_heatmaps['2_{}'.format(i)] = x_hg
+      else:
+        all_heatmaps['2'] = x_hg
+
+      # Intermediate conv and residual layers between hourglasses
+      if i < num_hourglasses - 1:
+        inter_hg_conv1 = mobilenet.Conv2DBNBlock(
+            filters=inp_filters,
+            kernel_size=1,
+            strides=1,
+            activation='identity',
+            bias_regularizer=self._bias_regularizer,
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            use_sync_bn=self._use_sync_bn,
+            norm_momentum=self._norm_momentum,
+            norm_epsilon=self._norm_epsilon
+        )(x_downsampled)
+
+        inter_hg_conv2 = mobilenet.Conv2DBNBlock(
+            filters=inp_filters,
+            kernel_size=1,
+            strides=1,
+            activation='identity',
+            bias_regularizer=self._bias_regularizer,
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            use_sync_bn=self._use_sync_bn,
+            norm_momentum=self._norm_momentum,
+            norm_epsilon=self._norm_epsilon
+        )(x_hg)
+
+        x_downsampled = tf.keras.layers.Add()([inter_hg_conv1, inter_hg_conv2])
+        x_downsampled = tf.keras.layers.ReLU()(x_downsampled)
+
+        x_downsampled = nn_blocks.ResidualBlock(
+            filters=inp_filters,
+            use_projection=False,
+            use_explicit_padding=True,
+            strides=1,
+            bias_regularizer=self._bias_regularizer,
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            use_sync_bn=self._use_sync_bn,
+            norm_momentum=self._norm_momentum,
+            norm_epsilon=self._norm_epsilon
+        )(x_downsampled)
+
+    self._output_specs = {l: all_heatmaps[l].get_shape() for l in all_heatmaps}
+
+    super().__init__(inputs=inputs, outputs=all_heatmaps, **kwargs)
+
+  def get_config(self):
+    config = {
+        'model_id': self._model_id,
+        'input_channel_dims': self._input_channel_dims,
+        'num_hourglasses': self._num_hourglasses,
+        'initial_downsample': self._initial_downsample,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    config.update(super(Hourglass, self).get_config())
+    return config
+
+  @property
+  def num_hourglasses(self):
+    return self._num_hourglasses
+
+  @property
+  def output_specs(self):
+    return self._output_specs
+
+
+@factory.register_backbone_builder('hourglass')
+def build_hourglass(
+    input_specs: tf.keras.layers.InputSpec,
+    backbone_config: hyperparams.Config,
+    norm_activation_config: hyperparams.Config,
+    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
+    ) -> tf.keras.Model:
+  """Builds Hourglass backbone from a configuration."""
+  backbone_type = backbone_config.type
+  backbone_cfg = backbone_config.get()
+  assert backbone_type == 'hourglass', (f'Inconsistent backbone type '
+                                        f'{backbone_type}')
+
+  return Hourglass(
+      model_id=backbone_cfg.model_id,
+      input_channel_dims=backbone_cfg.input_channel_dims,
+      num_hourglasses=backbone_cfg.num_hourglasses,
+      input_specs=input_specs,
+      initial_downsample=backbone_cfg.initial_downsample,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer,
+  )
--- a/official/vision/beta/projects/centernet/modeling/backbones/hourglass_test.py
+++ b/official/vision/beta/projects/centernet/modeling/backbones/hourglass_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for hourglass module."""
+
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+
+from official.vision.beta.configs import common
+from official.vision.beta.projects.centernet.common import registry_imports  # pylint: disable=unused-import
+from official.vision.beta.projects.centernet.configs import backbones
+from official.vision.beta.projects.centernet.modeling.backbones import hourglass
+
+
+class HourglassTest(tf.test.TestCase, parameterized.TestCase):
+
+  def test_hourglass(self):
+    backbone = hourglass.build_hourglass(
+        input_specs=tf.keras.layers.InputSpec(shape=[None, 512, 512, 3]),
+        backbone_config=backbones.Backbone(type='hourglass'),
+        norm_activation_config=common.NormActivation(use_sync_bn=True)
+    )
+    inputs = np.zeros((2, 512, 512, 3), dtype=np.float32)
+    outputs = backbone(inputs)
+    self.assertEqual(outputs['2_0'].shape, (2, 128, 128, 256))
+    self.assertEqual(outputs['2'].shape, (2, 128, 128, 256))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/projects/centernet/modeling/centernet_model.py
+++ b/official/vision/beta/projects/centernet/modeling/centernet_model.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Centernet detection models."""
+
+from typing import Mapping, Union, Any
+
+import tensorflow as tf
+
+
+class CenterNetModel(tf.keras.Model):
+  """CenterNet Model."""
+
+  def __init__(self,
+               backbone: tf.keras.Model,
+               head: tf.keras.Model,
+               detection_generator: tf.keras.layers.Layer,
+               **kwargs):
+    """CenterNet Model.
+
+    Args:
+      backbone: a backbone network.
+      head: a projection head for centernet.
+      detection_generator: a detection generator for centernet.
+      **kwargs: keyword arguments to be passed.
+    """
+    super(CenterNetModel, self).__init__(**kwargs)
+    # model components
+    self._backbone = backbone
+    self._detection_generator = detection_generator
+    self._head = head
+
+  def call(self,
+           inputs: tf.Tensor,
+           training: bool = None,
+           **kwargs) -> Mapping[str, tf.Tensor]:
+    features = self._backbone(inputs)
+    raw_outputs = self._head(features)
+    model_outputs = {'raw_output': raw_outputs}
+    if not training:
+      predictions = self._detection_generator(raw_outputs)
+      model_outputs.update(predictions)
+    return model_outputs
+
+  @property
+  def checkpoint_items(
+      self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]:
+    """Returns a dictionary of items to be additionally checkpointed."""
+    items = dict(backbone=self.backbone, head=self.head)
+
+    return items
+
+  @property
+  def backbone(self):
+    return self._backbone
+
+  @property
+  def detection_generator(self):
+    return self._detection_generator
+
+  @property
+  def head(self):
+    return self._head
+
+  def get_config(self) -> Mapping[str, Any]:
+    config_dict = {
+        'backbone': self._backbone,
+        'head': self._head,
+        'detection_generator': self._detection_generator,
+    }
+    return config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
--- a/official/vision/beta/projects/centernet/modeling/centernet_model_test.py
+++ b/official/vision/beta/projects/centernet/modeling/centernet_model_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Test for centernet detection model."""
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.vision.beta.configs import common
+from official.vision.beta.projects.centernet.configs import backbones
+from official.vision.beta.projects.centernet.modeling import centernet_model
+from official.vision.beta.projects.centernet.modeling.backbones import hourglass
+from official.vision.beta.projects.centernet.modeling.heads import centernet_head
+from official.vision.beta.projects.centernet.modeling.layers import detection_generator
+
+
+class CenterNetTest(parameterized.TestCase, tf.test.TestCase):
+
+  def testBuildCenterNet(self):
+    backbone = hourglass.build_hourglass(
+        input_specs=tf.keras.layers.InputSpec(shape=[None, 512, 512, 3]),
+        backbone_config=backbones.Backbone(type='hourglass'),
+        norm_activation_config=common.NormActivation(use_sync_bn=True)
+    )
+
+    task_config = {
+        'ct_heatmaps': 90,
+        'ct_offset': 2,
+        'ct_size': 2,
+    }
+
+    input_levels = ['2_0', '2']
+
+    head = centernet_head.CenterNetHead(
+        task_outputs=task_config,
+        input_specs=backbone.output_specs,
+        input_levels=input_levels)
+
+    detection_ge = detection_generator.CenterNetDetectionGenerator()
+
+    model = centernet_model.CenterNetModel(
+        backbone=backbone,
+        head=head,
+        detection_generator=detection_ge
+    )
+
+    outputs = model(tf.zeros((5, 512, 512, 3)))
+    self.assertLen(outputs['raw_output'], 3)
+    self.assertLen(outputs['raw_output']['ct_heatmaps'], 2)
+    self.assertLen(outputs['raw_output']['ct_offset'], 2)
+    self.assertLen(outputs['raw_output']['ct_size'], 2)
+    self.assertEqual(outputs['raw_output']['ct_heatmaps'][0].shape,
+                     (5, 128, 128, 90))
+    self.assertEqual(outputs['raw_output']['ct_offset'][0].shape,
+                     (5, 128, 128, 2))
+    self.assertEqual(outputs['raw_output']['ct_size'][0].shape,
+                     (5, 128, 128, 2))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/projects/centernet/modeling/heads/centernet_head.py
+++ b/official/vision/beta/projects/centernet/modeling/heads/centernet_head.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains the definitions of head for CenterNet."""
+
+from typing import Any, Mapping, Dict, List
+
+import tensorflow as tf
+
+from official.vision.beta.projects.centernet.modeling.layers import cn_nn_blocks
+
+
+@tf.keras.utils.register_keras_serializable(package='centernet')
+class CenterNetHead(tf.keras.Model):
+  """CenterNet Head."""
+
+  def __init__(self,
+               input_specs: Dict[str, tf.TensorShape],
+               task_outputs: Mapping[str, int],
+               input_levels: List[str],
+               heatmap_bias: float = -2.19,
+               **kwargs):
+    """CenterNet Head Initialization.
+
+    Args:
+      input_specs: A `dict` of input specifications.
+      task_outputs: A `dict`, with key-value pairs denoting the names of the
+        outputs and the desired channel depth of each output.
+      input_levels: list of str representing the level used as input to the
+        CenternetHead from the backbone. For example, ['2_0', '2'] should be
+        set for hourglass-104 has two hourglass-52 modules, since the output
+        of hourglass backbones is organized as:
+          '2' -> the last layer of output
+          '2_0' -> the first layer of output
+          ......
+          '2_{num_hourglasses-2}' -> the second to last layer of output.
+      heatmap_bias: `float`, constant value to initialize the convolution layer
+        bias vector if it is responsible for generating a heatmap (not for
+        regressed predictions).
+      **kwargs: Additional keyword arguments to be passed.
+
+    Returns:
+      dictionary where the keys-value pairs denote the names of the output
+      and the respective output tensor
+    """
+    assert input_levels, f'Please specify input levels: {input_levels}'
+
+    self._input_specs = input_specs
+    self._task_outputs = task_outputs
+    self._input_levels = input_levels
+    self._heatmap_bias = heatmap_bias
+    self._num_inputs = len(input_levels)
+
+    input_levels = sorted(self._input_specs.keys())
+    inputs = {level: tf.keras.layers.Input(shape=self._input_specs[level][1:])
+              for level in input_levels}
+    outputs = {}
+
+    for key in self._task_outputs:
+      # pylint: disable=g-complex-comprehension
+      outputs[key] = [
+          cn_nn_blocks.CenterNetHeadConv(
+              output_filters=self._task_outputs[key],
+              bias_init=self._heatmap_bias if 'heatmaps' in key else 0,
+              name=key + str(i),
+          )(inputs[i])
+          for i in input_levels
+      ]
+
+    self._output_specs = {
+        key: [value[i].get_shape() for i in range(self._num_inputs)]
+        for key, value in outputs.items()
+    }
+
+    super().__init__(inputs=inputs, outputs=outputs,
+                     name='CenterNetHead', **kwargs)
+
+  def get_config(self) -> Mapping[str, Any]:
+    config = {
+        'input_spec': self._input_specs,
+        'task_outputs': self._task_outputs,
+        'heatmap_bias': self._heatmap_bias,
+        'input_levels': self._input_levels,
+    }
+
+    base_config = super(CenterNetHead, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
+
+  @property
+  def output_specs(self) -> Mapping[str, tf.TensorShape]:
+    """A dict of {level: TensorShape} pairs for the model output."""
+    return self._output_specs
--- a/official/vision/beta/projects/centernet/modeling/heads/centernet_head_test.py
+++ b/official/vision/beta/projects/centernet/modeling/heads/centernet_head_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for Centernet Head."""
+
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+
+from official.vision.beta.projects.centernet.modeling.heads import centernet_head
+
+
+class CenterNetHeadTest(tf.test.TestCase, parameterized.TestCase):
+
+  def test_decoder_shape(self):
+    task_config = {
+        'ct_heatmaps': 90,
+        'ct_offset': 2,
+        'ct_size': 2,
+    }
+    input_specs = {
+        '2_0': tf.keras.layers.InputSpec(shape=(None, 128, 128, 256)).shape,
+        '2': tf.keras.layers.InputSpec(shape=(None, 128, 128, 256)).shape,
+    }
+
+    input_levels = ['2', '2_0']
+
+    head = centernet_head.CenterNetHead(
+        task_outputs=task_config,
+        input_specs=input_specs,
+        input_levels=input_levels)
+
+    config = head.get_config()
+    self.assertEqual(config['heatmap_bias'], -2.19)
+
+    # Output shape tests
+    outputs = head([np.zeros((2, 128, 128, 256), dtype=np.float32),
+                    np.zeros((2, 128, 128, 256), dtype=np.float32)])
+    self.assertLen(outputs, 3)
+    self.assertEqual(outputs['ct_heatmaps'][0].shape, (2, 128, 128, 90))
+    self.assertEqual(outputs['ct_offset'][0].shape, (2, 128, 128, 2))
+    self.assertEqual(outputs['ct_size'][0].shape, (2, 128, 128, 2))
+
+    # Weight initialization tests
+    hm_bias_vector = np.asarray(head.layers[2].weights[-1])
+    off_bias_vector = np.asarray(head.layers[4].weights[-1])
+    size_bias_vector = np.asarray(head.layers[6].weights[-1])
+
+    self.assertArrayNear(hm_bias_vector,
+                         np.repeat(-2.19, repeats=90), err=1.00e-6)
+    self.assertArrayNear(off_bias_vector,
+                         np.repeat(0, repeats=2), err=1.00e-6)
+    self.assertArrayNear(size_bias_vector,
+                         np.repeat(0, repeats=2), err=1.00e-6)
+
+
+if __name__ == '__main__':
+  tf.test.main()