Merge pull request #14 from srihari-humbarwadi/panoptic-deeplab-modeling

panoptic deeplab modelling

Merge pull request #14 from srihari-humbarwadi/panoptic-deeplab-modeling
panoptic deeplab modelling
92221745 · Srihari Humbarwadi · GitHub · 229e43e8 · cdd61f61 · 92221745
Unverified Commit 92221745 authored Mar 11, 2022 by Srihari Humbarwadi Committed by GitHub Mar 11, 2022
10 changed files
--- a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Panoptic Deeplab configuration definition."""
+import dataclasses
+from typing import List, Tuple, Union
+from official.modeling import hyperparams
+from official.vision.beta.configs import common
+from official.vision.beta.configs import backbones
+from official.vision.beta.configs import decoders
+_COCO_INPUT_PATH_BASE = 'coco/tfrecords'
+_COCO_TRAIN_EXAMPLES = 118287
+_COCO_VAL_EXAMPLES = 5000
+@dataclasses.dataclass
+class PanopticDeeplabHead(hyperparams.Config):
+  """Panoptic Deeplab head config."""
+  level: int = 3
+  num_convs: int = 2
+  num_filters: int = 256
+  kernel_size: int = 5
+  use_depthwise_convolution: bool = False
+  upsample_factor: int = 1
+  low_level: Union[List[int], Tuple[int]] = (3, 2)
+  low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32)
+@dataclasses.dataclass
+class SemanticHead(PanopticDeeplabHead):
+  """Semantic head config."""
+  prediction_kernel_size: int = 1
+@dataclasses.dataclass
+class InstanceHead(PanopticDeeplabHead):
+  """Instance head config."""
+  prediction_kernel_size: int = 1
+@dataclasses.dataclass
+class PanopticDeeplabPostProcessor(hyperparams.Config):
+  """Panoptic Deeplab PostProcessing config."""
+  center_score_threshold: float = 0.1
+  thing_class_ids: List[int] = dataclasses.field(default_factory=list)
+  label_divisor: int = 256 * 256 * 256
+  stuff_area_limit: int = 4096
+  ignore_label: int = 0
+  nms_kernel: int = 41
+  keep_k_centers: int = 400
+@dataclasses.dataclass
+class PanopticDeeplab(hyperparams.Config):
+  """Panoptic Deeplab model config."""
+  num_classes: int = 0
+  input_size: List[int] = dataclasses.field(default_factory=list)
+  min_level: int = 3
+  max_level: int = 6
+  norm_activation: common.NormActivation = common.NormActivation()
+  backbone: backbones.Backbone = backbones.Backbone(
+      type='resnet', resnet=backbones.ResNet())
+  decoder: decoders.Decoder = decoders.Decoder(type='aspp')
+  semantic_head: SemanticHead = SemanticHead()
+  instance_head: InstanceHead = InstanceHead()
+  shared_decoder: bool = False
+  post_processor: PanopticDeeplabPostProcessor = PanopticDeeplabPostProcessor()
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
@@ -16,10 +16,15 @@
 import tensorflow as tf
+from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg
 from official.vision.beta.projects.deepmac_maskrcnn.tasks import deep_mask_head_rcnn
 from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg
+from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
 from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_maskrcnn_model
 from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_segmentation_generator
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
 from official.vision.modeling import backbones
 from official.vision.modeling.decoders import factory as decoder_factory
 from official.vision.modeling.heads import segmentation_heads
@@ -142,3 +147,97 @@ def build_panoptic_maskrcnn(
      aspect_ratios=model_config.anchor.aspect_ratios,
      anchor_size=model_config.anchor.anchor_size)
  return model
+def build_panoptic_deeplab(
+        input_specs: tf.keras.layers.InputSpec,
+        model_config: panoptic_deeplab_cfg.PanopticDeeplab,
+        l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
+  """Builds Panoptic Deeplab model.
+  Args:
+    input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
+    model_config: Config instance for the panoptic maskrcnn model.
+    l2_regularizer: Optional `tf.keras.regularizers.Regularizer`, if specified,
+      the model is built with the provided regularization layer.
+  Returns:
+    tf.keras.Model for the panoptic segmentation model.
+  """
+  norm_activation_config = model_config.norm_activation
+  backbone = backbones.factory.build_backbone(
+      input_specs=input_specs,
+      backbone_config=model_config.backbone,
+      norm_activation_config=norm_activation_config,
+      l2_regularizer=l2_regularizer)
+  semantic_decoder = decoder_factory.build_decoder(
+      input_specs=backbone.output_specs,
+      model_config=model_config,
+      l2_regularizer=l2_regularizer)
+  if model_config.shared_decoder:
+    instance_decoder = None
+  else:
+    # TODO(srihari-humbarwadi): decouple semantic and 
+    # instance decoder types
+    instance_decoder = decoder_factory.build_decoder(
+        input_specs=backbone.output_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+  semantic_head_config = model_config.semantic_head
+  instance_head_config = model_config.instance_head
+  semantic_head = panoptic_deeplab_heads.SemanticHead(
+      num_classes=model_config.num_classes,
+      level=semantic_head_config.level,
+      num_convs=semantic_head_config.num_convs,
+      kernel_size=semantic_head_config.kernel_size,
+      prediction_kernel_size=semantic_head_config.prediction_kernel_size,
+      num_filters=semantic_head_config.num_filters,
+      use_depthwise_convolution=semantic_head_config.use_depthwise_convolution,
+      upsample_factor=semantic_head_config.upsample_factor,
+      low_level=semantic_head_config.low_level,
+      low_level_num_filters=semantic_head_config.low_level_num_filters,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer)
+  instance_head = panoptic_deeplab_heads.InstanceHead(
+      level=instance_head_config.level,
+      num_convs=instance_head_config.num_convs,
+      kernel_size=instance_head_config.kernel_size,
+      prediction_kernel_size=instance_head_config.prediction_kernel_size,
+      num_filters=instance_head_config.num_filters,
+      use_depthwise_convolution=instance_head_config.use_depthwise_convolution,
+      upsample_factor=instance_head_config.upsample_factor,
+      low_level=instance_head_config.low_level,
+      low_level_num_filters=instance_head_config.low_level_num_filters,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer)
+  post_processing_config = model_config.post_processor
+  post_processor = panoptic_deeplab_merge.PostProcessor(
+      center_score_threshold=post_processing_config.center_score_threshold,
+      thing_class_ids=post_processing_config.thing_class_ids,
+      label_divisor=post_processing_config.label_divisor,
+      stuff_area_limit=post_processing_config.stuff_area_limit,
+      ignore_label=post_processing_config.ignore_label,
+      nms_kernel=post_processing_config.nms_kernel,
+      keep_k_centers=post_processing_config.keep_k_centers)
+  model = panoptic_deeplab_model.PanopticDeeplabModel(
+      backbone=backbone, 
+      semantic_decoder=semantic_decoder,
+      instance_decoder=instance_decoder,
+      semantic_head=semantic_head,
+      instance_head=instance_head, 
+      post_processor=post_processor)
+  return model
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
@@ -17,8 +17,10 @@
 from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
+from tensorflow.python.distribute import combinations
 from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg
+from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg
 from official.vision.beta.projects.panoptic_maskrcnn.modeling import factory
 from official.vision.configs import backbones
 from official.vision.configs import decoders
@@ -62,5 +64,47 @@ class PanopticMaskRCNNBuilderTest(parameterized.TestCase, tf.test.TestCase):
        model_config=model_config,
        l2_regularizer=l2_regularizer)
+class PanopticDeeplabBuilderTest(parameterized.TestCase, tf.test.TestCase):
+  @combinations.generate(
+      combinations.combine(
+          input_size=[(640, 640), (512, 512)],
+          backbone_type=['resnet', 'dilated_resnet'],
+          decoder_type=['aspp', 'fpn'],
+          level=[2, 3, 4],
+          low_level=[(4, 3), (3, 2)],
+          shared_decoder=[True, False]))
+  def test_builder(self, input_size, backbone_type, level, 
+                   low_level, decoder_type, shared_decoder):
+    num_classes = 10
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size[0], input_size[1], 3])
+    model_config = panoptic_deeplab_cfg.PanopticDeeplab(
+        num_classes=num_classes,
+        input_size=input_size,
+        backbone=backbones.Backbone(type=backbone_type),
+        decoder=decoders.Decoder(type=decoder_type),
+        semantic_head=panoptic_deeplab_cfg.SemanticHead(
+            level=level, 
+            num_convs=1, 
+            kernel_size=5, 
+            prediction_kernel_size=1, 
+            low_level=low_level),
+        instance_head=panoptic_deeplab_cfg.InstanceHead(
+            level=level,
+            num_convs=1,
+            kernel_size=5,
+            prediction_kernel_size=1,
+            low_level=low_level),
+        shared_decoder=shared_decoder)
+    l2_regularizer = tf.keras.regularizers.l2(5e-5)
+    _ = factory.build_panoptic_deeplab(
+        input_specs=input_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
 if __name__ == '__main__':
  tf.test.main()
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains definitions for Panoptic Deeplab heads."""
+from typing import List, Union, Optional, Mapping, Tuple
+import tensorflow as tf
+from official.modeling import tf_utils
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import fusion_layers
+from official.vision.beta.ops import spatial_transform_ops
+class PanopticDeeplabHead(tf.keras.layers.Layer):
+  """Creates a panoptic deeplab head."""
+  def __init__(
+      self,
+      level: Union[int, str],
+      num_convs: int = 2,
+      num_filters: int = 256,
+      kernel_size: int = 3,
+      use_depthwise_convolution: bool = False,
+      upsample_factor: int = 1,
+      low_level: Union[List[int], Tuple[int]] = (3, 2),
+      low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32),
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a panoptic deeplab head.
+    Args:
+      level: An `int` or `str`, level to use to build head.
+      num_convs: An `int` number of stacked convolution before the last
+        prediction layer.
+      num_filters: An `int` number to specify the number of filters used.
+        Default is 256.
+      kernel_size:  An `int` number to specify the kernel size of the
+        stacked convolutions before the last prediction layer.
+      use_depthwise_convolution: A bool to specify if use depthwise separable
+        convolutions.
+      upsample_factor: An `int` number to specify the upsampling factor to
+        generate finer mask. Default 1 means no upsampling is applied.
+      low_level: An `int` of backbone level to be used for feature fusion. It is
+        used when feature_fusion is set to `deeplabv3plus`.
+      low_level_num_filters: An `int` of reduced number of filters for the low
+        level features before fusing it with higher level features. It is only
+        used when feature_fusion is set to `deeplabv3plus`.
+      activation: A `str` that indicates which activation is used, e.g. 'relu',
+        'swish', etc.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(PanopticDeeplabHead, self).__init__(**kwargs)
+    self._config_dict = {
+        'level': level,
+        'num_convs': num_convs,
+        'num_filters': num_filters,
+        'kernel_size': kernel_size,
+        'use_depthwise_convolution': use_depthwise_convolution,
+        'upsample_factor': upsample_factor,
+        'low_level': low_level,
+        'low_level_num_filters': low_level_num_filters,
+        'activation': activation,
+        'use_sync_bn': use_sync_bn,
+        'norm_momentum': norm_momentum,
+        'norm_epsilon': norm_epsilon,
+        'kernel_regularizer': kernel_regularizer,
+        'bias_regularizer': bias_regularizer
+    }
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._activation = tf_utils.get_activation(activation)
+  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
+    """Creates the variables of the head."""
+    kernel_size = self._config_dict['kernel_size']
+    use_depthwise_convolution = self._config_dict['use_depthwise_convolution']
+    random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01)
+    conv_op = tf.keras.layers.Conv2D
+    conv_kwargs = {
+        'kernel_size': kernel_size if not use_depthwise_convolution else 1,
+        'padding': 'same',
+        'use_bias': False,
+        'kernel_initializer': random_initializer,
+        'kernel_regularizer': self._config_dict['kernel_regularizer'],
+    }
+    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
+             if self._config_dict['use_sync_bn']
+             else tf.keras.layers.BatchNormalization)
+    bn_kwargs = {
+        'axis': self._bn_axis,
+        'momentum': self._config_dict['norm_momentum'],
+        'epsilon': self._config_dict['norm_epsilon'],
+    }
+    self._panoptic_deeplab_fusion = fusion_layers.PanopticDeepLabFusion(
+        level=self._config_dict['level'],
+        low_level=self._config_dict['low_level'],
+        num_projection_filters=self._config_dict['low_level_num_filters'],
+        num_output_filters=self._config_dict['num_filters'],
+        activation=self._config_dict['activation'],
+        use_sync_bn=self._config_dict['use_sync_bn'],
+        norm_momentum=self._config_dict['norm_momentum'],
+        norm_epsilon=self._config_dict['norm_epsilon'],
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        bias_regularizer=self._config_dict['bias_regularizer'])
+    # Stacked convolutions layers.
+    self._convs = []
+    self._norms = []
+    for i in range(self._config_dict['num_convs']):
+      if use_depthwise_convolution:
+        self._convs.append(
+            tf.keras.layers.DepthwiseConv2D(
+                name='panoptic_deeplab_head_depthwise_conv_{}'.format(i),
+                kernel_size=3,
+                padding='same',
+                use_bias=False,
+                depthwise_initializer=random_initializer,
+                depthwise_regularizer=self._config_dict['kernel_regularizer'],
+                depth_multiplier=1))
+        norm_name = 'panoptic_deeplab_head_depthwise_norm_{}'.format(i)
+        self._norms.append(bn_op(name=norm_name, **bn_kwargs))
+      conv_name = 'panoptic_deeplab_head_conv_{}'.format(i)
+      self._convs.append(
+          conv_op(
+              name=conv_name,
+              filters=self._config_dict['num_filters'],
+              **conv_kwargs))
+      norm_name = 'panoptic_deeplab_head_norm_{}'.format(i)
+      self._norms.append(bn_op(name=norm_name, **bn_kwargs))
+    super().build(input_shape)
+  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
+                               Union[tf.Tensor, Mapping[str, tf.Tensor]]],
+           training=None):
+    """Forward pass of the head.
+    It supports both a tuple of 2 tensors or 2 dictionaries. The first is
+    backbone endpoints, and the second is decoder endpoints. When inputs are
+    tensors, they are from a single level of feature maps. When inputs are
+    dictionaries, they contain multiple levels of feature maps, where the key
+    is the index of feature map.
+    Args:
+      inputs: A tuple of 2 feature map tensors of shape
+        [batch, height_l, width_l, channels] or 2 dictionaries of tensors:
+        - key: A `str` of the level of the multilevel features.
+        - values: A `tf.Tensor` of the feature map tensors, whose shape is
+            [batch, height_l, width_l, channels].
+    Returns:
+      A `tf.Tensor` of the fused backbone and decoder features.
+    """
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+    x = self._panoptic_deeplab_fusion(inputs, training=training)
+    for conv, norm in zip(self._convs, self._norms):
+      x = conv(x)
+      x = norm(x, training=training)
+      x = self._activation(x)
+    if self._config_dict['upsample_factor'] > 1:
+      x = spatial_transform_ops.nearest_upsampling(
+          x, scale=self._config_dict['upsample_factor'])
+    return x
+  def get_config(self):
+    base_config = super().get_config()
+    return dict(list(base_config.items()) + list(self._config_dict.items()))
+  @classmethod
+  def from_config(cls, config):
+    return cls(**config)
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class SemanticHead(PanopticDeeplabHead):
+  """Creates a semantic head."""
+  def __init__(
+      self,
+      num_classes: int,
+      level: Union[int, str],
+      num_convs: int = 2,
+      num_filters: int = 256,
+      kernel_size: int = 3,
+      prediction_kernel_size: int = 3,
+      use_depthwise_convolution: bool = False,
+      upsample_factor: int = 1,
+      low_level: Union[List[int], Tuple[int]] = (3, 2),
+      low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32),
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a instance center head.
+    Args:
+      num_classes: An `int` number of mask classification categories. The number
+        of classes does not include background class.
+      level: An `int` or `str`, level to use to build head.
+      num_convs: An `int` number of stacked convolution before the last
+        prediction layer.
+      num_filters: An `int` number to specify the number of filters used.
+        Default is 256.
+      kernel_size:  An `int` number to specify the kernel size of the
+        stacked convolutions before the last prediction layer.
+      prediction_kernel_size: An `int` number to specify the kernel size of the
+        prediction layer.      
+      use_depthwise_convolution: A bool to specify if use depthwise separable
+        convolutions.
+      upsample_factor: An `int` number to specify the upsampling factor to
+        generate finer mask. Default 1 means no upsampling is applied.
+      low_level: An `int` of backbone level to be used for feature fusion. It is
+        used when feature_fusion is set to `deeplabv3plus`.
+      low_level_num_filters: An `int` of reduced number of filters for the low
+        level features before fusing it with higher level features. It is only
+        used when feature_fusion is set to `deeplabv3plus`.
+      activation: A `str` that indicates which activation is used, e.g. 'relu',
+        'swish', etc.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(SemanticHead, self).__init__(
+        level=level,
+        num_convs=num_convs,
+        num_filters=num_filters,
+        use_depthwise_convolution=use_depthwise_convolution,
+        kernel_size=kernel_size,
+        upsample_factor=upsample_factor,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters,
+        activation=activation,
+        use_sync_bn=use_sync_bn,
+        norm_momentum=norm_momentum,
+        norm_epsilon=norm_epsilon,
+        kernel_regularizer=kernel_regularizer,
+        bias_regularizer=bias_regularizer,
+        **kwargs)
+    self._config_dict.update({
+        'num_classes': num_classes,
+        'prediction_kernel_size': prediction_kernel_size})
+  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
+    """Creates the variables of the semantic head."""
+    super(SemanticHead, self).build(input_shape)
+    self._classifier = tf.keras.layers.Conv2D(
+        name='semantic_output',
+        filters=self._config_dict['num_classes'],
+        kernel_size=self._config_dict['prediction_kernel_size'],
+        padding='same',
+        bias_initializer=tf.zeros_initializer(),
+        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        bias_regularizer=self._config_dict['bias_regularizer'])
+  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
+                               Union[tf.Tensor, Mapping[str, tf.Tensor]]], 
+           training=None):
+    """Forward pass of the head."""
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+    x = super(SemanticHead, self).call(inputs, training=training)
+    outputs = self._classifier(x)
+    return outputs
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class InstanceHead(PanopticDeeplabHead):
+  """Creates a instance head."""
+  def __init__(
+      self,
+      level: Union[int, str],
+      num_convs: int = 2,
+      num_filters: int = 256,
+      kernel_size: int = 3,
+      prediction_kernel_size: int = 3,
+      use_depthwise_convolution: bool = False,
+      upsample_factor: int = 1,
+      low_level: Union[List[int], Tuple[int]] = (3, 2),
+      low_level_num_filters: Union[List[int], Tuple[int]] = (64, 32),
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a instance center head.
+    Args:
+      level: An `int` or `str`, level to use to build head.
+      num_convs: An `int` number of stacked convolution before the last
+        prediction layer.
+      num_filters: An `int` number to specify the number of filters used.
+        Default is 256.
+      kernel_size:  An `int` number to specify the kernel size of the
+        stacked convolutions before the last prediction layer.
+      prediction_kernel_size: An `int` number to specify the kernel size of the
+        prediction layer.      
+      use_depthwise_convolution: A bool to specify if use depthwise separable
+        convolutions.
+      upsample_factor: An `int` number to specify the upsampling factor to
+        generate finer mask. Default 1 means no upsampling is applied.
+      low_level: An `int` of backbone level to be used for feature fusion. It is
+        used when feature_fusion is set to `deeplabv3plus`.
+      low_level_num_filters: An `int` of reduced number of filters for the low
+        level features before fusing it with higher level features. It is only
+        used when feature_fusion is set to `deeplabv3plus`.
+      activation: A `str` that indicates which activation is used, e.g. 'relu',
+        'swish', etc.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(InstanceHead, self).__init__(
+        level=level,
+        num_convs=num_convs,
+        num_filters=num_filters,
+        use_depthwise_convolution=use_depthwise_convolution,
+        kernel_size=kernel_size,
+        upsample_factor=upsample_factor,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters,
+        activation=activation,
+        use_sync_bn=use_sync_bn,
+        norm_momentum=norm_momentum,
+        norm_epsilon=norm_epsilon,
+        kernel_regularizer=kernel_regularizer,
+        bias_regularizer=bias_regularizer,
+        **kwargs)
+    self._config_dict.update({
+        'prediction_kernel_size': prediction_kernel_size})
+  def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
+    """Creates the variables of the instance head."""
+    super(InstanceHead, self).build(input_shape)
+    self._instance_center_prediction_conv = tf.keras.layers.Conv2D(
+        name='instance_center_prediction',
+        filters=1,
+        kernel_size=self._config_dict['prediction_kernel_size'],
+        padding='same',
+        bias_initializer=tf.zeros_initializer(),
+        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        bias_regularizer=self._config_dict['bias_regularizer'])
+    self._instance_center_regression_conv = tf.keras.layers.Conv2D(
+        name='instance_center_regression',
+        filters=2,
+        kernel_size=self._config_dict['prediction_kernel_size'],
+        padding='same',
+        bias_initializer=tf.zeros_initializer(),
+        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        bias_regularizer=self._config_dict['bias_regularizer'])
+  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
+                               Union[tf.Tensor, Mapping[str, tf.Tensor]]],
+           training=None):
+    """Forward pass of the head."""
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+    x = super(InstanceHead, self).call(inputs, training=training)
+    instance_center_prediction = self._instance_center_prediction_conv(x)
+    instance_center_regression = self._instance_center_regression_conv(x)
+    outputs = {
+        'instance_center_prediction': instance_center_prediction,
+        'instance_center_regression': instance_center_regression
+    }
+    return outputs
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Tests for panoptic_deeplab_heads.py."""
+# Import libraries
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
+class PanopticDeeplabHeadsTest(parameterized.TestCase, tf.test.TestCase):
+  @parameterized.parameters(
+      (2, (2,), (48,)),
+      (3, (2,), (48,)),
+      (2, (2,), (48,)),
+      (2, (2,), (48,)),
+      (3, (2,), (48,)),
+      (3, (2,), (48,)),
+      (4, (4, 3), (64, 32)),
+      (4, (3, 2), (64, 32)))
+  def test_forward(self, level, low_level, low_level_num_filters):
+    backbone_features = {
+        '3': np.random.rand(2, 128, 128, 16),
+        '4': np.random.rand(2, 64, 64, 16),
+        '5': np.random.rand(2, 32, 32, 16),
+    }
+    decoder_features = {
+        '3': np.random.rand(2, 128, 128, 64),
+        '4': np.random.rand(2, 64, 64, 64),
+        '5': np.random.rand(2, 32, 32, 64),
+        '6': np.random.rand(2, 16, 16, 64),
+    }
+    backbone_features['2'] = np.random.rand(2, 256, 256, 16)
+    decoder_features['2'] = np.random.rand(2, 256, 256, 64)
+    num_classes = 10
+    semantic_head = panoptic_deeplab_heads.SemanticHead(
+        num_classes=num_classes,
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters)
+    instance_head = panoptic_deeplab_heads.InstanceHead(
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=low_level_num_filters)
+    semantic_outputs = semantic_head((backbone_features, decoder_features))
+    instance_outputs = instance_head((backbone_features, decoder_features))
+    if str(level) in decoder_features:
+      h, w = decoder_features[str(low_level[-1])].shape[1:3]
+      self.assertAllEqual(
+          semantic_outputs.numpy().shape,
+          [2, h, w, num_classes])
+      self.assertAllEqual(
+          instance_outputs['instance_center_prediction'].numpy().shape,
+          [2, h, w, 1])
+      self.assertAllEqual(
+          instance_outputs['instance_center_regression'].numpy().shape,
+          [2, h, w, 2])
+  def test_serialize_deserialize(self):
+    semantic_head = panoptic_deeplab_heads.SemanticHead(num_classes=2, level=3)
+    instance_head = panoptic_deeplab_heads.InstanceHead(level=3)
+    semantic_head_config = semantic_head.get_config()
+    instance_head_config = instance_head.get_config()
+    new_semantic_head = panoptic_deeplab_heads.SemanticHead.from_config(
+        semantic_head_config)
+    new_instance_head = panoptic_deeplab_heads.InstanceHead.from_config(
+        instance_head_config)
+    self.assertAllEqual(semantic_head.get_config(),
+                        new_semantic_head.get_config())
+    self.assertAllEqual(instance_head.get_config(),
+                        new_instance_head.get_config())
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains common building blocks for neural networks."""
+from typing import Any, Callable, Dict, List, Mapping, Optional, Union
+import tensorflow as tf
+from official.modeling import tf_utils
+# Type annotations.
+States = Dict[str, tf.Tensor]
+Activation = Union[str, Callable]
+class PanopticDeepLabFusion(tf.keras.layers.Layer):
+  """Creates a Panoptic DeepLab feature Fusion layer.
+  This implements the feature fusion introduced in the paper:
+  Cheng et al. Panoptic-DeepLab
+  (https://arxiv.org/pdf/1911.10194.pdf)
+  """
+  def __init__(
+      self,
+      level: int,
+      low_level: List[int] = [3, 2],
+      num_projection_filters: List[int] = [64, 32],
+      num_output_filters: int = 256,
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      interpolation: str = 'bilinear',
+      **kwargs):
+    """Initializes panoptic FPN feature fusion layer.
+    Args:
+      level: An `int` level at which the decoder was appled at.
+      low_level: A list of `int` of minimum level to use in feature fusion.
+      num_filters: An `int` number of filters in conv2d layers.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.      
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      interpolation: A `str` interpolation method for upsampling. Defaults to
+        `bilinear`.      
+      **kwargs: Additional keyword arguments to be passed.
+    Returns:
+      A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
+        feature_channel].
+    """
+    super(PanopticDeepLabFusion, self).__init__(**kwargs)
+    self._config_dict = {
+        'level': level,
+        'low_level': low_level,
+        'num_projection_filters': num_projection_filters,
+        'num_output_filters': num_output_filters,
+        'activation': activation,
+        'use_sync_bn': use_sync_bn,
+        'norm_momentum': norm_momentum,
+        'norm_epsilon': norm_epsilon,
+        'kernel_regularizer': kernel_regularizer,
+        'bias_regularizer': bias_regularizer,
+        'interpolation': interpolation
+    }
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._channel_axis = -1
+    else:
+      self._channel_axis = 1
+    self._activation = tf_utils.get_activation(activation)
+  def build(self, input_shape: List[tf.TensorShape]):
+    conv_op = tf.keras.layers.Conv2D
+    conv_kwargs = {
+        'padding': 'same',
+        'use_bias': False,
+        'kernel_initializer': tf.initializers.VarianceScaling(),
+        'kernel_regularizer': self._config_dict['kernel_regularizer'],
+    }
+    bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
+             if self._config_dict['use_sync_bn']
+             else tf.keras.layers.BatchNormalization)
+    bn_kwargs = {
+        'axis': self._channel_axis,
+        'momentum': self._config_dict['norm_momentum'],
+        'epsilon': self._config_dict['norm_epsilon'],
+    }
+    self._projection_convs = []
+    self._projection_norms = []
+    self._fusion_convs = []
+    self._fusion_norms = []
+    for i in range(len(self._config_dict['low_level'])):
+      self._projection_convs.append(
+          conv_op(
+              filters=self._config_dict['num_projection_filters'][i],
+              kernel_size=1,
+              **conv_kwargs))
+      self._fusion_convs.append(
+          conv_op(
+              filters=self._config_dict['num_output_filters'],
+              kernel_size=5,
+              **conv_kwargs))
+      self._projection_norms.append(bn_op(**bn_kwargs))
+      self._fusion_norms.append(bn_op(**bn_kwargs))
+  def call(self, inputs, training=None):
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+    backbone_output = inputs[0]
+    decoder_output = inputs[1][str(self._config_dict['level'])]
+    x = decoder_output
+    for i in range(len(self._config_dict['low_level'])):
+      feature = backbone_output[str(self._config_dict['low_level'][i])]
+      feature = self._projection_convs[i](feature)
+      feature = self._projection_norms[i](feature, training=training)
+      feature = self._activation(feature)
+      shape = tf.shape(feature)
+      x = tf.image.resize(
+          x, size=[shape[1], shape[2]],
+          method=self._config_dict['interpolation'])
+      x = tf.concat([x, feature], axis=self._channel_axis)
+      x = self._fusion_convs[i](x)
+      x = self._fusion_norms[i](x, training=training)
+      x = self._activation(x)
+    return x
+  def get_config(self) -> Mapping[str, Any]:
+    return self._config_dict
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains functions to post-process Panoptic-DeepLab results
+Note that the postprocessing class and the supporting functions are branched
+from https://github.com/google-research/deeplab2/blob/main/model/post_processor/panoptic_deeplab.py
+"""
+import functools
+from typing import List, Tuple, Dict, Text
+import tensorflow as tf
+def _add_zero_padding(input_tensor: tf.Tensor, kernel_size: int,
+                      rank: int) -> tf.Tensor:
+  """Adds zero-padding to the input_tensor."""
+  pad_total = kernel_size - 1
+  pad_begin = pad_total // 2
+  pad_end = pad_total - pad_begin
+  if rank == 3:
+    return tf.pad(
+        input_tensor,
+        paddings=[[pad_begin, pad_end], [pad_begin, pad_end], [0, 0]])
+  else:
+    return tf.pad(
+        input_tensor,
+        paddings=[[0, 0], [pad_begin, pad_end], [pad_begin, pad_end], [0, 0]])
+def _get_semantic_predictions(semantic_logits: tf.Tensor) -> tf.Tensor:
+  """Computes the semantic classes from the predictions.
+  Args:
+    semantic_logits: A tf.tensor of shape [batch, height, width, classes].
+  Returns:
+    A tf.Tensor containing the semantic class prediction of shape
+      [batch, height, width].
+  """
+  return tf.argmax(semantic_logits, axis=-1, output_type=tf.int32)
+def _get_instance_centers_from_heatmap(
+        center_heatmap: tf.Tensor,
+        center_threshold: float,
+        nms_kernel_size: int,
+        keep_k_centers: int) -> Tuple[tf.Tensor, tf.Tensor]:
+  """Computes a list of instance centers.
+  Args:
+    center_heatmap: A tf.Tensor of shape [height, width, 1].
+    center_threshold: A float setting the threshold for the center heatmap.
+    nms_kernel_size: An integer specifying the nms kernel size.
+    keep_k_centers: An integer specifying the number of centers to keep (K).
+      Non-positive values will keep all centers.
+  Returns:
+    A tuple of
+    - tf.Tensor of shape [N, 2] containing N center coordinates (after
+      non-maximum suppression) in (y, x) order.
+    - tf.Tensor of shape [height, width] containing the center heatmap after
+      non-maximum suppression.
+  """
+  # Threshold center map.
+  center_heatmap = tf.where(
+      tf.greater(center_heatmap, center_threshold), center_heatmap, 0.0)
+  # Non-maximum suppression.
+  padded_map = _add_zero_padding(center_heatmap, nms_kernel_size, rank=3)
+  pooled_center_heatmap = tf.keras.backend.pool2d(
+      tf.expand_dims(padded_map, 0),
+      pool_size=(nms_kernel_size, nms_kernel_size),
+      strides=(1, 1),
+      padding='valid',
+      pool_mode='max')
+  center_heatmap = tf.where(
+      tf.equal(pooled_center_heatmap, center_heatmap), center_heatmap, 0.0)
+  center_heatmap = tf.squeeze(center_heatmap, axis=[0, 3])
+  # `centers` is of shape (N, 2) with (y, x) order of the second dimension.
+  centers = tf.where(tf.greater(center_heatmap, 0.0))
+  if keep_k_centers > 0 and tf.shape(centers)[0] > keep_k_centers:
+    topk_scores, _ = tf.math.top_k(
+        tf.reshape(center_heatmap, [-1]), keep_k_centers, sorted=False)
+    centers = tf.where(tf.greater(center_heatmap, topk_scores[-1]))
+  return centers, center_heatmap
+def _find_closest_center_per_pixel(centers: tf.Tensor,
+                                   center_offsets: tf.Tensor) -> tf.Tensor:
+  """Assigns all pixels to their closest center.
+  Args:
+    centers: A tf.Tensor of shape [N, 2] containing N centers with coordinate
+      order (y, x).
+    center_offsets: A tf.Tensor of shape [height, width, 2].
+  Returns:
+    A tf.Tensor of shape [height, width] containing the index of the closest
+      center, per pixel.
+  """
+  height = tf.shape(center_offsets)[0]
+  width = tf.shape(center_offsets)[1]
+  x_coord, y_coord = tf.meshgrid(tf.range(width), tf.range(height))
+  coord = tf.stack([y_coord, x_coord], axis=-1)
+  center_per_pixel = tf.cast(coord, tf.float32) + center_offsets
+  # centers: [N, 2] -> [N, 1, 2].
+  # center_per_pixel: [H, W, 2] -> [1, H*W, 2].
+  centers = tf.cast(tf.expand_dims(centers, 1), tf.float32)
+  center_per_pixel = tf.reshape(center_per_pixel, [height*width, 2])
+  center_per_pixel = tf.expand_dims(center_per_pixel, 0)
+  # distances: [N, H*W].
+  distances = tf.norm(centers - center_per_pixel, axis=-1)
+  return tf.reshape(tf.argmin(distances, axis=0), [height, width])
+def _get_instances_from_heatmap_and_offset(
+        semantic_segmentation: tf.Tensor, center_heatmap: tf.Tensor,
+        center_offsets: tf.Tensor, center_threshold: float,
+        thing_class_ids: tf.Tensor, nms_kernel_size: int,
+        keep_k_centers: int) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+  """Computes the instance assignment per pixel.
+  Args:
+    semantic_segmentation: A tf.Tensor containing the semantic labels of shape
+      [height, width].
+    center_heatmap: A tf.Tensor of shape [height, width, 1].
+    center_offsets: A tf.Tensor of shape [height, width, 2].
+    center_threshold: A float setting the threshold for the center heatmap.
+    thing_class_ids: A tf.Tensor of shape [N] containing N thing indices.
+    nms_kernel_size: An integer specifying the nms kernel size.
+    keep_k_centers: An integer specifying the number of centers to keep.
+      Negative values will keep all centers.
+  Returns:
+    A tuple of:
+    - tf.Tensor containing the instance segmentation (filtered with the `thing`
+      segmentation from the semantic segmentation output) with shape
+      [height, width].
+    - tf.Tensor containing the processed centermap with shape [height, width].
+    - tf.Tensor containing instance scores (where higher "score" is a reasonable
+      signal of a higher confidence detection.) Will be of shape [height, width]
+      with the score for a pixel being the score of the instance it belongs to.
+      The scores will be zero for pixels in background/"stuff" regions.
+  """
+  thing_segmentation = tf.zeros_like(semantic_segmentation)
+  for thing_id in thing_class_ids:
+    thing_segmentation = tf.where(tf.equal(semantic_segmentation, thing_id),
+                                  1,
+                                  thing_segmentation)
+  centers, processed_center_heatmap = _get_instance_centers_from_heatmap(
+      center_heatmap, center_threshold, nms_kernel_size, keep_k_centers)
+  if tf.shape(centers)[0] == 0:
+    return (tf.zeros_like(semantic_segmentation), processed_center_heatmap,
+            tf.zeros_like(processed_center_heatmap))
+  instance_center_index = _find_closest_center_per_pixel(
+      centers, center_offsets)
+  # Instance IDs should start with 1. So we use the index into the centers, but
+  # shifted by 1.
+  instance_segmentation = tf.cast(instance_center_index, tf.int32) + 1
+  # The value of the heatmap at an instance's center is used as the score
+  # for that instance.
+  instance_scores = tf.gather_nd(processed_center_heatmap, centers)
+  tf.debugging.assert_shapes([
+      (centers, ('N', 2)),
+      (instance_scores, ('N',)),
+  ])
+  # This will map the instance scores back to the image space: where each pixel
+  # has a value equal to the score of its instance.
+  flat_center_index = tf.reshape(instance_center_index, [-1])
+  instance_score_map = tf.gather(instance_scores, flat_center_index)
+  instance_score_map = tf.reshape(instance_score_map,
+                                  tf.shape(instance_segmentation))
+  instance_score_map *= tf.cast(thing_segmentation, tf.float32)
+  return (thing_segmentation * instance_segmentation, processed_center_heatmap,
+          instance_score_map)
+@tf.function
+def _get_panoptic_predictions(
+    semantic_logits: tf.Tensor, center_heatmap: tf.Tensor,
+    center_offsets: tf.Tensor, center_threshold: float,
+    thing_class_ids: tf.Tensor, label_divisor: int, stuff_area_limit: int,
+    void_label: int, nms_kernel_size: int, keep_k_centers: int
+) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
+  """Computes the semantic class and instance ID per pixel.
+  Args:
+    semantic_logits: A tf.Tensor of shape [batch, height, width, classes].
+    center_heatmap: A tf.Tensor of shape [batch, height, width, 1].
+    center_offsets: A tf.Tensor of shape [batch, height, width, 2].
+    center_threshold: A float setting the threshold for the center heatmap.
+    thing_class_ids: A tf.Tensor of shape [N] containing N thing indices.
+    label_divisor: An integer specifying the label divisor of the dataset.
+    stuff_area_limit: An integer specifying the number of pixels that stuff
+      regions need to have at least. The stuff region will be included in the
+      panoptic prediction, only if its area is larger than the limit; otherwise,
+      it will be re-assigned as void_label.
+    void_label: An integer specifying the void label.
+    nms_kernel_size: An integer specifying the nms kernel size.
+    keep_k_centers: An integer specifying the number of centers to keep.
+      Negative values will keep all centers.
+  Returns:
+    A tuple of:
+    - the panoptic prediction as tf.Tensor with shape [batch, height, width].
+    - the semantic prediction as tf.Tensor with shape [batch, height, width].
+    - the instance prediction as tf.Tensor with shape [batch, height, width].
+    - the centermap prediction as tf.Tensor with shape [batch, height, width].
+    - the instance score maps as tf.Tensor with shape [batch, height, width].
+  """
+  semantic_prediction = _get_semantic_predictions(semantic_logits)
+  batch_size = tf.shape(semantic_logits)[0]
+  instance_map_lists = tf.TensorArray(
+      tf.int32, size=batch_size, dynamic_size=False)
+  center_map_lists = tf.TensorArray(
+      tf.float32, size=batch_size, dynamic_size=False)
+  instance_score_map_lists = tf.TensorArray(
+      tf.float32, size=batch_size, dynamic_size=False)
+  for i in tf.range(batch_size):
+    (instance_map, center_map,
+     instance_score_map) = _get_instances_from_heatmap_and_offset(
+         semantic_prediction[i, ...], center_heatmap[i, ...],
+         center_offsets[i, ...], center_threshold, thing_class_ids,
+         nms_kernel_size, keep_k_centers)
+    instance_map_lists = instance_map_lists.write(i, instance_map)
+    center_map_lists = center_map_lists.write(i, center_map)
+    instance_score_map_lists = instance_score_map_lists.write(
+        i, instance_score_map)
+  # This does not work with unknown shapes.
+  instance_maps = instance_map_lists.stack()
+  center_maps = center_map_lists.stack()
+  instance_score_maps = instance_score_map_lists.stack()
+  panoptic_prediction = _merge_semantic_and_instance_maps(
+      semantic_prediction, instance_maps, thing_class_ids, label_divisor,
+      stuff_area_limit, void_label)
+  return (panoptic_prediction, semantic_prediction, instance_maps, center_maps,
+          instance_score_maps)
+@tf.function
+def _merge_semantic_and_instance_maps(
+        semantic_prediction: tf.Tensor,
+        instance_maps: tf.Tensor,
+        thing_class_ids: tf.Tensor,
+        label_divisor: int,
+        stuff_area_limit: int,
+        void_label: int) -> tf.Tensor:
+  """Merges semantic and instance maps to obtain panoptic segmentation.
+  This function merges the semantic segmentation and class-agnostic
+  instance segmentation to form the panoptic segmentation. In particular,
+  the class label of each instance mask is inferred from the majority
+  votes from the corresponding pixels in the semantic segmentation. This
+  operation is first poposed in the DeeperLab paper and adopted by the
+  Panoptic-DeepLab.
+  - DeeperLab: Single-Shot Image Parser, T-J Yang, et al. arXiv:1902.05093.
+  - Panoptic-DeepLab, B. Cheng, et al. In CVPR, 2020.
+  Note that this function only supports batch = 1 for simplicity. Additionally,
+  this function has a slightly different implementation from the provided
+  TensorFlow implementation `merge_ops` but with a similar performance. This
+  function is mainly used as a backup solution when you could not successfully
+  compile the provided TensorFlow implementation. To reproduce our results,
+  please use the provided TensorFlow implementation (i.e., not use this
+  function, but the `merge_ops.merge_semantic_and_instance_maps`).
+  Args:
+    semantic_prediction: A tf.Tensor of shape [batch, height, width].
+    instance_maps: A tf.Tensor of shape [batch, height, width].
+    thing_class_ids: A tf.Tensor of shape [N] containing N thing indices.
+    label_divisor: An integer specifying the label divisor of the dataset.
+    stuff_area_limit: An integer specifying the number of pixels that stuff
+      regions need to have at least. The stuff region will be included in the
+      panoptic prediction, only if its area is larger than the limit; otherwise,
+      it will be re-assigned as void_label.
+    void_label: An integer specifying the void label.
+  Returns:
+    panoptic_prediction: A tf.Tensor with shape [batch, height, width].
+  """
+  prediction_shape = semantic_prediction.get_shape().as_list()
+  # This implementation only supports batch size of 1. Since model construction
+  # might lose batch size information (and leave it to None), override it here.
+  prediction_shape[0] = 1
+  semantic_prediction = tf.ensure_shape(semantic_prediction, prediction_shape)
+  instance_maps = tf.ensure_shape(instance_maps, prediction_shape)
+  # Default panoptic_prediction to have semantic label = void_label.
+  panoptic_prediction = tf.ones_like(
+      semantic_prediction) * void_label * label_divisor
+  # Start to paste predicted `thing` regions to panoptic_prediction.
+  # Infer `thing` segmentation regions from semantic prediction.
+  semantic_thing_segmentation = tf.zeros_like(semantic_prediction,
+                                              dtype=tf.bool)
+  for thing_class in thing_class_ids:
+    semantic_thing_segmentation = tf.math.logical_or(
+        semantic_thing_segmentation,
+        semantic_prediction == thing_class)
+  # Keep track of how many instances for each semantic label.
+  num_instance_per_semantic_label = tf.TensorArray(
+      tf.int32, size=0, dynamic_size=True, clear_after_read=False)
+  instance_ids, _ = tf.unique(tf.reshape(instance_maps, [-1]))
+  for instance_id in instance_ids:
+    # Instance ID 0 is reserved for crowd region.
+    if instance_id == 0:
+      continue
+    thing_mask = tf.math.logical_and(instance_maps == instance_id,
+                                     semantic_thing_segmentation)
+    if tf.reduce_sum(tf.cast(thing_mask, tf.int32)) == 0:
+      continue
+    semantic_bin_counts = tf.math.bincount(
+        tf.boolean_mask(semantic_prediction, thing_mask))
+    semantic_majority = tf.cast(
+        tf.math.argmax(semantic_bin_counts), tf.int32)
+    while num_instance_per_semantic_label.size() <= semantic_majority:
+      num_instance_per_semantic_label = num_instance_per_semantic_label.write(
+          num_instance_per_semantic_label.size(), 0)
+    new_instance_id = (
+        num_instance_per_semantic_label.read(semantic_majority) + 1)
+    num_instance_per_semantic_label = num_instance_per_semantic_label.write(
+        semantic_majority, new_instance_id)
+    panoptic_prediction = tf.where(
+        thing_mask,
+        tf.ones_like(panoptic_prediction) * semantic_majority * label_divisor
+        + new_instance_id,
+        panoptic_prediction)
+  # Done with `num_instance_per_semantic_label` tensor array.
+  num_instance_per_semantic_label.close()
+  # Start to paste predicted `stuff` regions to panoptic prediction.
+  instance_stuff_regions = instance_maps == 0
+  semantic_ids, _ = tf.unique(tf.reshape(semantic_prediction, [-1]))
+  for semantic_id in semantic_ids:
+    if tf.reduce_sum(tf.cast(thing_class_ids == semantic_id, tf.int32)) > 0:
+      continue
+    # Check stuff area.
+    stuff_mask = tf.math.logical_and(semantic_prediction == semantic_id,
+                                     instance_stuff_regions)
+    stuff_area = tf.reduce_sum(tf.cast(stuff_mask, tf.int32))
+    if stuff_area >= stuff_area_limit:
+      panoptic_prediction = tf.where(
+          stuff_mask,
+          tf.ones_like(panoptic_prediction) * semantic_id * label_divisor,
+          panoptic_prediction)
+  return panoptic_prediction
+class PostProcessor(tf.keras.layers.Layer):
+  """This class contains code of a Panoptic-Deeplab post-processor."""
+  def __init__(
+      self,
+      center_score_threshold: float,
+      thing_class_ids: List[int],
+      label_divisor: int,
+      stuff_area_limit: int,
+      ignore_label: int,
+      nms_kernel: int,
+      keep_k_centers: int, 
+      **kwargs):
+    """Initializes a Panoptic-Deeplab post-processor.
+    Args:
+      center_threshold: A float setting the threshold for the center heatmap.
+      thing_class_ids: An integer list shape [N] containing N thing indices.
+      label_divisor: An integer specifying the label divisor of the dataset.
+      stuff_area_limit: An integer specifying the number of pixels that stuff
+        regions need to have at least. The stuff region will be included in the
+        panoptic prediction, only if its area is larger than the limit; 
+        otherwise, it will be re-assigned as void_label.
+      void_label: An integer specifying the void label.
+      nms_kernel_size: An integer specifying the nms kernel size.
+      keep_k_centers: An integer specifying the number of centers to keep.
+        Negative values will keep all centers.          
+    """
+    super(PostProcessor, self).__init__(**kwargs)
+    self._config_dict = {
+        'center_score_threshold': center_score_threshold,
+        'thing_class_ids': thing_class_ids,
+        'label_divisor': label_divisor,
+        'stuff_area_limit': stuff_area_limit,
+        'ignore_label': ignore_label,
+        'nms_kernel': nms_kernel,
+        'keep_k_centers': keep_k_centers
+    }
+    self._post_processor = functools.partial(
+        _get_panoptic_predictions,
+        center_threshold=center_score_threshold,
+        thing_class_ids=tf.convert_to_tensor(thing_class_ids),
+        label_divisor=label_divisor,
+        stuff_area_limit=stuff_area_limit,
+        void_label=ignore_label,
+        nms_kernel_size=nms_kernel,
+        keep_k_centers=keep_k_centers)
+  def call(self, result_dict: Dict[Text, tf.Tensor]) -> Dict[Text, tf.Tensor]:
+    """Performs the post-processing given model predicted results.
+    Args:
+      result_dict: A dictionary of tf.Tensor containing model results. The dict
+      has to contain
+        - segmentation_outputs
+        - instance_center_prediction
+        - instance_center_regression
+    Returns:
+      The post-processed dict of tf.Tensor, containing the following keys:
+        - panoptic_outputs
+        - category_mask
+        - instance_mask
+        - instance_centers
+        - instance_score
+    """
+    processed_dict = {}
+    (processed_dict['panoptic_outputs'],
+     processed_dict['category_mask'],
+     processed_dict['instance_mask'],
+     processed_dict['instance_centers'],
+     processed_dict['instance_scores']
+     ) = self._post_processor(
+         tf.nn.softmax(result_dict['segmentation_outputs'], axis=-1),
+         result_dict['instance_center_prediction'],
+         result_dict['instance_center_regression'])
+    return processed_dict
+  def get_config(self):
+    return self._config_dict
+  @classmethod
+  def from_config(cls, config):
+    return cls(**config)
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test for panoptic_deeplab.py.
+Note that the tests are branched from
+https://raw.githubusercontent.com/google-research/deeplab2/main/model/post_processor/panoptic_deeplab_test.py
+"""
+import numpy as np
+import tensorflow as tf
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
+class PostProcessingTest(tf.test.TestCase):
+  def test_py_func_merge_semantic_and_instance_maps_can_run(self):
+    batch = 1
+    height = 5
+    width = 5
+    semantic_prediction = tf.random.uniform((batch, height, width),
+                                            minval=0,
+                                            maxval=20,
+                                            dtype=tf.int32)
+    instance_maps = tf.random.uniform((batch, height, width),
+                                      minval=0,
+                                      maxval=3,
+                                      dtype=tf.int32)
+    thing_class_ids = tf.convert_to_tensor([1, 2, 3])
+    label_divisor = 256
+    stuff_area_limit = 3
+    void_label = 255
+    panoptic_prediction = panoptic_deeplab_merge._merge_semantic_and_instance_maps(
+        semantic_prediction, instance_maps, thing_class_ids, label_divisor,
+        stuff_area_limit, void_label)
+    self.assertListEqual(semantic_prediction.get_shape().as_list(),
+                         panoptic_prediction.get_shape().as_list())
+  def test_merge_semantic_and_instance_maps_with_a_simple_example(self):
+    semantic_prediction = tf.convert_to_tensor(
+        [[[0, 0, 0, 0],
+          [0, 1, 1, 0],
+          [0, 2, 2, 0],
+          [2, 2, 3, 3]]], dtype=tf.int32)
+    instance_maps = tf.convert_to_tensor(
+        [[[0, 0, 0, 0],
+          [0, 0, 0, 0],
+          [0, 1, 1, 0],
+          [2, 2, 3, 3]]], dtype=tf.int32)
+    thing_class_ids = tf.convert_to_tensor([2, 3])
+    label_divisor = 256
+    stuff_area_limit = 3
+    void_label = 255
+    # The expected_panoptic_prediction is computed as follows.
+    # For `thing` segmentation, instance 1, 2, and 3 are kept, but instance 3
+    # will have a new instance ID 1, since it is the first instance in its
+    # own semantic label.
+    # For `stuff` segmentation, class-0 region is kept, while class-1 region
+    # is re-labeled as `void_label * label_divisor` since its area is smaller
+    # than stuff_area_limit.
+    expected_panoptic_prediction = tf.convert_to_tensor(
+        [[[0, 0, 0, 0],
+          [0, void_label * label_divisor, void_label * label_divisor, 0],
+          [0, 2 * label_divisor + 1, 2 * label_divisor + 1, 0],
+          [2 * label_divisor + 2, 2 * label_divisor + 2, 3 * label_divisor + 1,
+           3 * label_divisor + 1]]], dtype=tf.int32)
+    panoptic_prediction = panoptic_deeplab_merge._merge_semantic_and_instance_maps(
+        semantic_prediction, instance_maps, thing_class_ids, label_divisor,
+        stuff_area_limit, void_label)
+    np.testing.assert_equal(expected_panoptic_prediction.numpy(),
+                            panoptic_prediction.numpy())
+  def test_gets_panoptic_predictions_with_score(self):
+    batch = 1
+    height = 5
+    width = 5
+    classes = 3
+    semantic_logits = tf.random.uniform((batch, 1, 1, classes))
+    semantic_logits = tf.tile(semantic_logits, (1, height, width, 1))
+    center_heatmap = tf.convert_to_tensor([
+        [1.0, 0.0, 0.0, 0.0, 0.0],
+        [0.8, 0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.1, 0.7],
+        [0.0, 0.0, 0.0, 0.0, 0.2],
+    ], dtype=tf.float32)
+    center_heatmap = tf.expand_dims(center_heatmap, 0)
+    center_heatmap = tf.expand_dims(center_heatmap, 3)
+    center_offsets = tf.zeros((batch, height, width, 2))
+    center_threshold = 0.0
+    thing_class_ids = tf.range(classes)  # No "stuff" classes.
+    label_divisor = 256
+    stuff_area_limit = 16
+    void_label = classes
+    nms_kernel_size = 3
+    keep_k_centers = 2
+    result = panoptic_deeplab_merge._get_panoptic_predictions(
+        semantic_logits, center_heatmap, center_offsets, center_threshold,
+        thing_class_ids, label_divisor, stuff_area_limit, void_label,
+        nms_kernel_size, keep_k_centers)
+    instance_maps = result[2].numpy()
+    instance_scores = result[4].numpy()
+    self.assertSequenceEqual(instance_maps.shape, (batch, height, width))
+    expected_instances = [[
+        [1, 1, 1, 1, 2],
+        [1, 1, 1, 2, 2],
+        [1, 1, 2, 2, 2],
+        [1, 2, 2, 2, 2],
+        [1, 2, 2, 2, 2],
+    ]]
+    np.testing.assert_array_equal(instance_maps, expected_instances)
+    self.assertSequenceEqual(instance_scores.shape, (batch, height, width))
+    expected_instance_scores = [[
+        [1.0, 1.0, 1.0, 1.0, 0.7],
+        [1.0, 1.0, 1.0, 0.7, 0.7],
+        [1.0, 1.0, 0.7, 0.7, 0.7],
+        [1.0, 0.7, 0.7, 0.7, 0.7],
+        [1.0, 0.7, 0.7, 0.7, 0.7],
+    ]]
+    np.testing.assert_array_almost_equal(instance_scores,
+                                         expected_instance_scores)
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Build Panoptic Deeplab model."""
+from typing import Any, Mapping, Optional, Union
+import tensorflow as tf
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class PanopticDeeplabModel(tf.keras.Model):
+  """Panoptic Deeplab model."""
+  def __init__(
+      self, 
+      backbone: tf.keras.Model, 
+      semantic_decoder: tf.keras.Model,
+      semantic_head: tf.keras.layers.Layer,
+      instance_head: tf.keras.layers.Layer,
+      instance_decoder: Optional[tf.keras.Model] = None,
+      post_processor: Optional[panoptic_deeplab_merge.PostProcessor] = None,
+      **kwargs):
+    """
+    Args:
+      backbone: a backbone network.
+      semantic_decoder: a decoder network. E.g. FPN.
+      semantic_head: segmentation head.
+      instance_head: instance center head .      
+      instance_decoder: Optional decoder network for instance predictions.       
+      **kwargs: keyword arguments to be passed.
+    """
+    super(PanopticDeeplabModel, self).__init__(**kwargs)
+    self._config_dict = {
+        'backbone': backbone,
+        'semantic_decoder': semantic_decoder,
+        'instance_decoder': instance_decoder,
+        'semantic_head': semantic_head,
+        'instance_head': instance_head,
+        'post_processor': post_processor
+    }
+    self.backbone = backbone
+    self.semantic_decoder = semantic_decoder
+    self.instance_decoder = instance_decoder
+    self.semantic_head = semantic_head
+    self.instance_head = instance_head
+    self.post_processor = post_processor
+  def call(self, inputs: tf.Tensor, training: bool = None) -> tf.Tensor:
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+    backbone_features = self.backbone(inputs, training=training)
+    semantic_features = self.semantic_decoder(
+        backbone_features, training=training)
+    if self.instance_decoder is None:
+      instance_features = semantic_features
+    else:
+      instance_features = self.instance_decoder(
+          backbone_features, training=training)
+    segmentation_outputs = self.semantic_head(
+        (backbone_features, semantic_features), 
+        training=training)
+    instance_outputs = self.instance_head(
+        (backbone_features, instance_features),
+        training=training)
+    outputs = {
+        'segmentation_outputs': segmentation_outputs,
+        'instance_center_prediction': 
+            instance_outputs['instance_center_prediction'],
+        'instance_center_regression': 
+            instance_outputs['instance_center_regression'],
+    }
+    if training:
+      return outputs
+    outputs = self.post_processor(outputs)
+    return outputs
+  @property
+  def checkpoint_items(
+      self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]:
+    """Returns a dictionary of items to be additionally checkpointed."""
+    items = dict(
+        backbone=self.backbone,
+        semantic_decoder=self.semantic_decoder,
+        semantic_head=self.semantic_head,
+        instance_head=self.instance_head)
+    if self.instance_decoder is not None:
+      items.update(instance_decoder=self.instance_decoder)
+    return items
+  def get_config(self) -> Mapping[str, Any]:
+    return self._config_dict
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Tests for Panoptic Deeplab network."""
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.distribute import combinations
+from official.vision.beta.modeling import backbones
+from official.vision.beta.modeling.decoders import aspp
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
+from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model
+from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
+class PanopticDeeplabNetworkTest(parameterized.TestCase, tf.test.TestCase):
+  @combinations.generate(
+      combinations.combine(
+          level=[2, 3, 4],
+          input_size=[256, 512],
+          low_level=[(4, 3), (3, 2)],
+          shared_decoder=[True, False],
+          training=[True, False]))
+  def test_panoptic_deeplab_network_creation(
+      self, input_size, level, low_level, shared_decoder, training):
+    """Test for creation of a panoptic deep lab network."""
+    batch_size = 2 if training else 1
+    num_classes = 10
+    inputs = np.random.rand(batch_size, input_size, input_size, 3)
+    tf.keras.backend.set_image_data_format('channels_last')
+    backbone = backbones.ResNet(model_id=50)
+    semantic_decoder = aspp.ASPP(
+        level=level, dilation_rates=[6, 12, 18])
+    if shared_decoder:
+      instance_decoder = semantic_decoder
+    else:
+      instance_decoder = aspp.ASPP(
+          level=level, dilation_rates=[6, 12, 18])
+    semantic_head = panoptic_deeplab_heads.SemanticHead(
+        num_classes,                                    
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=(64, 32))
+    instance_head = panoptic_deeplab_heads.InstanceHead(
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=(64, 32))
+    post_processor = panoptic_deeplab_merge.PostProcessor(
+        center_score_threshold=0.1,
+        thing_class_ids=[1, 2, 3, 4],
+        label_divisor=[256],
+        stuff_area_limit=4096,
+        ignore_label=0,
+        nms_kernel=41,
+        keep_k_centers=41)
+    model = panoptic_deeplab_model.PanopticDeeplabModel(
+        backbone=backbone,
+        semantic_decoder=semantic_decoder,
+        instance_decoder=instance_decoder,
+        semantic_head=semantic_head,
+        instance_head=instance_head,
+        post_processor=post_processor)
+    outputs = model(inputs, training=training)
+    if training:
+      self.assertIn('segmentation_outputs', outputs)
+      self.assertIn('instance_center_prediction', outputs)
+      self.assertIn('instance_center_regression', outputs)
+      self.assertAllEqual(
+          [2, input_size // (2**low_level[-1]),
+           input_size //(2**low_level[-1]),
+           num_classes],
+          outputs['segmentation_outputs'].numpy().shape)
+      self.assertAllEqual(
+          [2, input_size // (2**low_level[-1]),
+           input_size // (2**low_level[-1]),
+           1],
+          outputs['instance_center_prediction'].numpy().shape)
+      self.assertAllEqual(
+          [2, input_size // (2**low_level[-1]),
+           input_size // (2**low_level[-1]),
+           2],
+          outputs['instance_center_regression'].numpy().shape)
+    else:
+      self.assertIn('panoptic_outputs', outputs)
+      self.assertIn('category_mask', outputs)
+      self.assertIn('instance_mask', outputs)
+      self.assertIn('instance_centers', outputs)
+      self.assertIn('instance_scores', outputs)
+  @combinations.generate(
+      combinations.combine(
+          level=[2, 3, 4],
+          low_level=[(4, 3), (3, 2)],
+          shared_decoder=[True, False]))
+  def test_serialize_deserialize(self, level, low_level, shared_decoder):
+    """Validate the network can be serialized and deserialized."""
+    num_classes = 10
+    backbone = backbones.ResNet(model_id=50)
+    semantic_decoder = aspp.ASPP(
+        level=level, dilation_rates=[6, 12, 18])
+    if shared_decoder:
+      instance_decoder = semantic_decoder
+    else:
+      instance_decoder = aspp.ASPP(
+          level=level, dilation_rates=[6, 12, 18])
+    semantic_head = panoptic_deeplab_heads.SemanticHead(
+        num_classes,
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=(64, 32))
+    instance_head = panoptic_deeplab_heads.InstanceHead(
+        level=level,
+        low_level=low_level,
+        low_level_num_filters=(64, 32))
+    post_processor = panoptic_deeplab_merge.PostProcessor(
+        center_score_threshold=0.1,
+        thing_class_ids=[1, 2, 3, 4],
+        label_divisor=[256],
+        stuff_area_limit=4096,
+        ignore_label=0,
+        nms_kernel=41,
+        keep_k_centers=41)
+    model = panoptic_deeplab_model.PanopticDeeplabModel(
+        backbone=backbone,
+        semantic_decoder=semantic_decoder,
+        instance_decoder=instance_decoder,
+        semantic_head=semantic_head,
+        instance_head=instance_head,
+        post_processor=post_processor)
+    config = model.get_config()
+    new_model = panoptic_deeplab_model.PanopticDeeplabModel.from_config(config)
+    # Validate that the config can be forced to JSON.
+    _ = new_model.to_json()
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(model.get_config(), new_model.get_config())
+if __name__ == '__main__':
+  tf.test.main()