Merge branch 'master' of https://github.com/tensorflow/models

2412b118 · Gunho Park · f7783e7a · 6dbdb08c · 2412b118 · 2412b118
Commit 2412b118 authored Jul 02, 2022 by Gunho Park
20 changed files
--- a/docs/nlp/fine_tune_bert.ipynb
+++ b/docs/nlp/fine_tune_bert.ipynb
--- a/official/core/__init__.py
+++ b/official/core/__init__.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 """Core is shared by both `nlp` and `vision`."""
+
 from official.core import actions
 from official.core import base_task
 from official.core import base_trainer
@@ -21,6 +22,7 @@ from official.core import exp_factory
 from official.core import export_base
 from official.core import input_reader
 from official.core import registry
+from official.core import savedmodel_checkpoint_manager
 from official.core import task_factory
 from official.core import train_lib
 from official.core import train_utils
--- a/official/core/savedmodel_checkpoint_manager.py
+++ b/official/core/savedmodel_checkpoint_manager.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Custom checkpoint manager that also exports saved models."""
+
+import os
+import re
+from typing import Callable, Mapping, Optional
+
+from absl import logging
+import tensorflow as tf
+
+
+def make_saved_modules_directory_name(checkpoint_name: str) -> str:
+  return f'{checkpoint_name}_saved_modules'
+
+
+class SavedModelCheckpointManager(tf.train.CheckpointManager):
+  """A CheckpointManager that also exports `SavedModel`s."""
+
+  def __init__(self,
+               checkpoint: tf.train.Checkpoint,
+               directory: str,
+               max_to_keep: int,
+               modules_to_export: Optional[Mapping[str, tf.Module]] = None,
+               keep_checkpoint_every_n_hours: Optional[int] = None,
+               checkpoint_name: str = 'ckpt',
+               step_counter: Optional[tf.Variable] = None,
+               checkpoint_interval: Optional[int] = None,
+               init_fn: Optional[Callable[[], None]] = None):
+    """See base class."""
+    super().__init__(
+        checkpoint=checkpoint,
+        directory=directory,
+        max_to_keep=max_to_keep,
+        keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
+        checkpoint_name=checkpoint_name,
+        step_counter=step_counter,
+        checkpoint_interval=checkpoint_interval,
+        init_fn=init_fn)
+    self._modules_to_export = modules_to_export
+
+  def save(self,
+           checkpoint_number=None,
+           check_interval: bool = True,
+           options: Optional[tf.train.CheckpointOptions] = None):
+    """See base class."""
+    checkpoint_path = super().save(
+        checkpoint_number=checkpoint_number,
+        check_interval=check_interval,
+        options=options)
+    if not checkpoint_path:  # Nothing got written.
+      return
+    if not self._modules_to_export:  # No modules to export.
+      logging.info('Skip saving SavedModel due to empty modules_to_export.')
+      return checkpoint_path
+
+    # Save the models for the checkpoint that just got written.
+    saved_modules_directory = make_saved_modules_directory_name(checkpoint_path)
+    for model_name, model in self._modules_to_export.items():
+      tf.saved_model.save(
+          obj=model,
+          export_dir=os.path.join(saved_modules_directory, model_name))
+
+    # `checkpoint_path` ends in `-[\d]+`.  We want to glob for all existing
+    # checkpoints, and we use the .index file for that.
+    checkpoint_glob = re.sub(r'\d+$', '*.index', checkpoint_path)
+    existing_checkpoint_files = tf.io.gfile.glob(checkpoint_glob)
+
+    saved_modules_directories_to_keep = [
+        make_saved_modules_directory_name(os.path.splitext(ckpt_index)[0])
+        for ckpt_index in existing_checkpoint_files
+    ]
+    saved_modules_glob = re.sub(r'\d+_saved_modules$', '*_saved_modules',
+                                saved_modules_directory)
+
+    for existing_saved_modules_dir in tf.io.gfile.glob(saved_modules_glob):
+      if (existing_saved_modules_dir not in saved_modules_directories_to_keep
+          and tf.io.gfile.isdir(existing_saved_modules_dir)):
+        tf.io.gfile.rmtree(existing_saved_modules_dir)
+
+    return checkpoint_path
--- a/official/core/savedmodel_checkpoint_manager_test.py
+++ b/official/core/savedmodel_checkpoint_manager_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Iterable
+
+import tensorflow as tf
+
+from official.core import savedmodel_checkpoint_manager
+
+
+def _models_exist(checkpoint_path: str, models: Iterable[str]) -> bool:
+  for model_name in models:
+    if not tf.io.gfile.isdir(
+        os.path.join(
+            savedmodel_checkpoint_manager.make_saved_modules_directory_name(
+                checkpoint_path), model_name)):
+      return False
+  return True
+
+
+class CheckpointManagerTest(tf.test.TestCase):
+
+  def testSimpleTest(self):
+    models = {
+        "model_1":
+            tf.keras.Sequential(
+                layers=[tf.keras.layers.Dense(8, input_shape=(16,))]),
+        "model_2":
+            tf.keras.Sequential(
+                layers=[tf.keras.layers.Dense(16, input_shape=(32,))]),
+    }
+    checkpoint = tf.train.Checkpoint()
+    manager = savedmodel_checkpoint_manager.SavedModelCheckpointManager(
+        checkpoint=checkpoint,
+        directory=self.get_temp_dir(),
+        max_to_keep=1,
+        modules_to_export=models)
+
+    first_path = manager.save()
+    second_path = manager.save()
+
+    self.assertTrue(_models_exist(second_path, models.keys()))
+    self.assertFalse(_models_exist(first_path, models.keys()))
+
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/official/core/train_lib.py
+++ b/official/core/train_lib.py
@@ -32,6 +32,226 @@ from official.core import train_utils
 maybe_create_best_ckpt_exporter = train_utils.maybe_create_best_ckpt_exporter


+class OrbitExperimentRunner:
+  """Runs experiment with Orbit training loop.
+
+  The default experiment runner for model garden experiments. User can
+  customize the experiment pipeline by subclassing this class and replacing
+  components or functions.
+
+  For example, an experiment runner with customized checkpoint manager:
+
+  ```python
+  class MyExpRunnerWithExporter(AbstractExperimentRunner):
+    def _maybe_build_checkpoint_manager(sefl):
+      return MyCheckpointManager(*args)
+
+  # In user code
+  MyExpRunnerWithExporter(**needed_kwargs).run(mode)
+  ```
+
+  Similar override can be done to other components.
+  """
+
+  def __init__(
+      self,
+      distribution_strategy: tf.distribute.Strategy,
+      task: base_task.Task,
+      mode: str,
+      params: config_definitions.ExperimentConfig,
+      model_dir: str,
+      run_post_eval: bool = False,
+      save_summary: bool = True,
+      train_actions: Optional[List[orbit.Action]] = None,
+      eval_actions: Optional[List[orbit.Action]] = None,
+      trainer: Optional[base_trainer.Trainer] = None,
+      controller_cls=orbit.Controller
+  ):
+    """Constructor.
+
+    Args:
+      distribution_strategy: A distribution strategy.
+      task: A Task instance.
+      mode: A 'str', specifying the mode. Can be 'train', 'eval',
+        'train_and_eval' or 'continuous_eval'.
+      params: ExperimentConfig instance.
+      model_dir: A 'str', a path to store model checkpoints and summaries.
+      run_post_eval: Whether to run post eval once after training, metrics logs
+        are returned.
+      save_summary: Whether to save train and validation summary.
+      train_actions: Optional list of Orbit train actions.
+      eval_actions: Optional list of Orbit eval actions.
+      trainer: the base_trainer.Trainer instance. It should be created within
+        the strategy.scope().
+      controller_cls: The controller class to manage the train and eval process.
+        Must be a orbit.Controller subclass.
+    """
+    self.strategy = distribution_strategy or tf.distribute.get_strategy()
+    self._params = params
+    self._model_dir = model_dir
+    self._mode = mode
+    self._run_post_eval = run_post_eval
+
+    self._trainer = trainer or self._build_trainer(
+        task,
+        train='train' in mode,
+        evaluate=('eval' in mode) or run_post_eval)
+    assert self.trainer is not None
+    self._checkpoint_manager = self._maybe_build_checkpoint_manager()
+    self._controller = self._build_controller(
+        trainer=self.trainer if 'train' in mode else None,
+        evaluator=self.trainer,
+        save_summary=save_summary,
+        train_actions=train_actions,
+        eval_actions=eval_actions,
+        controller_cls=controller_cls)
+
+  @property
+  def params(self) -> config_definitions.ExperimentConfig:
+    return self._params
+
+  @property
+  def model_dir(self) -> str:
+    return self._model_dir
+
+  @property
+  def trainer(self) -> base_trainer.Trainer:
+    return self._trainer
+
+  @property
+  def checkpoint_manager(self) -> tf.train.CheckpointManager:
+    return self._checkpoint_manager
+
+  @property
+  def controller(self) -> orbit.Controller:
+    return self._controller
+
+  def _build_trainer(self, task: base_task.Task, train: bool,
+                     evaluate: bool) -> base_trainer.Trainer:
+    """Create trainer."""
+    with self.strategy.scope():
+      trainer = train_utils.create_trainer(
+          self.params,
+          task,
+          train=train,
+          evaluate=evaluate,
+          checkpoint_exporter=self._build_best_checkpoint_exporter())
+    return trainer
+
+  def _build_best_checkpoint_exporter(self):
+    return maybe_create_best_ckpt_exporter(self.params, self.model_dir)
+
+  def _maybe_build_checkpoint_manager(
+      self) -> Optional[tf.train.CheckpointManager]:
+    """Maybe create a CheckpointManager."""
+    assert self.trainer is not None
+    if self.trainer.checkpoint:
+      if self.model_dir is None:
+        raise ValueError('model_dir must be specified, but got None')
+      checkpoint_manager = tf.train.CheckpointManager(
+          self.trainer.checkpoint,
+          directory=self.model_dir,
+          max_to_keep=self.params.trainer.max_to_keep,
+          step_counter=self.trainer.global_step,
+          checkpoint_interval=self.params.trainer.checkpoint_interval,
+          init_fn=self.trainer.initialize)
+    else:
+      checkpoint_manager = None
+    return checkpoint_manager
+
+  def _build_controller(self,
+                        trainer,
+                        evaluator,
+                        save_summary: bool = True,
+                        train_actions: Optional[List[orbit.Action]] = None,
+                        eval_actions: Optional[List[orbit.Action]] = None,
+                        controller_cls=orbit.Controller) -> orbit.Controller:
+    """Builds a Orbit controler."""
+    train_actions = [] if not train_actions else train_actions
+    if trainer:
+      train_actions += actions.get_train_actions(
+          self.params,
+          trainer,
+          self.model_dir,
+          checkpoint_manager=self.checkpoint_manager)
+
+    eval_actions = [] if not eval_actions else eval_actions
+    if evaluator:
+      eval_actions += actions.get_eval_actions(self.params, evaluator,
+                                               self.model_dir)
+
+    controller = controller_cls(
+        strategy=self.strategy,
+        trainer=trainer,
+        evaluator=evaluator,
+        global_step=self.trainer.global_step,
+        steps_per_loop=self.params.trainer.steps_per_loop,
+        checkpoint_manager=self.checkpoint_manager,
+        summary_dir=os.path.join(self.model_dir, 'train') if
+        (save_summary) else None,
+        eval_summary_dir=os.path.join(
+            self.model_dir, self.params.trainer.validation_summary_subdir) if
+        (save_summary) else None,
+        summary_interval=self.params.trainer.summary_interval if
+        (save_summary) else None,
+        train_actions=train_actions,
+        eval_actions=eval_actions)
+    return controller
+
+  def run(self) -> Tuple[tf.keras.Model, Mapping[str, Any]]:
+    """Run experiments by mode.
+
+    Returns:
+      A 2-tuple of (model, eval_logs).
+        model: `tf.keras.Model` instance.
+        eval_logs: returns eval metrics logs when run_post_eval is set to True,
+          otherwise, returns {}.
+    """
+    mode = self._mode
+    params = self.params
+    logging.info('Starts to execute mode: %s', mode)
+    with self.strategy.scope():
+      if mode == 'train' or mode == 'train_and_post_eval':
+        self.controller.train(steps=params.trainer.train_steps)
+      elif mode == 'train_and_eval':
+        self.controller.train_and_evaluate(
+            train_steps=params.trainer.train_steps,
+            eval_steps=params.trainer.validation_steps,
+            eval_interval=params.trainer.validation_interval)
+      elif mode == 'eval':
+        self.controller.evaluate(steps=params.trainer.validation_steps)
+      elif mode == 'continuous_eval':
+
+        def timeout_fn():
+          if self.trainer.global_step.numpy() >= params.trainer.train_steps:
+            return True
+          return False
+
+        self.controller.evaluate_continuously(
+            steps=params.trainer.validation_steps,
+            timeout=params.trainer.continuous_eval_timeout,
+            timeout_fn=timeout_fn)
+      else:
+        raise NotImplementedError('The mode is not implemented: %s' % mode)
+
+    num_params = train_utils.try_count_params(self.trainer.model)
+    if num_params is not None:
+      logging.info('Number of trainable params in model: %f Millions.',
+                   num_params / 10.**6)
+
+    flops = train_utils.try_count_flops(self.trainer.model)
+    if flops is not None:
+      logging.info('FLOPs (multi-adds) in model: %f Billions.',
+                   flops / 10.**9 / 2)
+
+    if self._run_post_eval or mode == 'train_and_post_eval':
+      with self.strategy.scope():
+        return self.trainer.model, self.controller.evaluate(
+            steps=params.trainer.validation_steps)
+    else:
+      return self.trainer.model, {}
+
+
 def run_experiment(
    distribution_strategy: tf.distribute.Strategy,
    task: base_task.Task,
@@ -70,91 +290,17 @@ def run_experiment(
      eval_logs: returns eval metrics logs when run_post_eval is set to True,
        otherwise, returns {}.
  """
-
-  with distribution_strategy.scope():
-    if not trainer:
-      trainer = train_utils.create_trainer(
-          params,
-          task,
-          train='train' in mode,
-          evaluate=('eval' in mode) or run_post_eval,
-          checkpoint_exporter=maybe_create_best_ckpt_exporter(
-              params, model_dir))
-
-  if trainer.checkpoint:
-    if model_dir is None:
-      raise ValueError('model_dir must be specified, but got None')
-    checkpoint_manager = tf.train.CheckpointManager(
-        trainer.checkpoint,
-        directory=model_dir,
-        max_to_keep=params.trainer.max_to_keep,
-        step_counter=trainer.global_step,
-        checkpoint_interval=params.trainer.checkpoint_interval,
-        init_fn=trainer.initialize)
-  else:
-    checkpoint_manager = None
-
-  train_actions = [] if not train_actions else train_actions
-  train_actions += actions.get_train_actions(
-      params, trainer, model_dir, checkpoint_manager=checkpoint_manager)
-
-  eval_actions = [] if not eval_actions else eval_actions
-  eval_actions += actions.get_eval_actions(params, trainer, model_dir)
-
-  controller = controller_cls(
-      strategy=distribution_strategy,
-      trainer=trainer if 'train' in mode else None,
-      evaluator=trainer,
-      global_step=trainer.global_step,
-      steps_per_loop=params.trainer.steps_per_loop,
-      checkpoint_manager=checkpoint_manager,
-      summary_dir=os.path.join(model_dir, 'train') if (save_summary) else None,
-      eval_summary_dir=os.path.join(model_dir,
-                                    params.trainer.validation_summary_subdir) if
-      (save_summary) else None,
-      summary_interval=params.trainer.summary_interval if
-      (save_summary) else None,
+  runner = OrbitExperimentRunner(
+      distribution_strategy=distribution_strategy,
+      task=task,
+      mode=mode,
+      params=params,
+      model_dir=model_dir,
+      run_post_eval=run_post_eval,
+      save_summary=save_summary,
      train_actions=train_actions,
-      eval_actions=eval_actions)
-
-  logging.info('Starts to execute mode: %s', mode)
-  with distribution_strategy.scope():
-    if mode == 'train' or mode == 'train_and_post_eval':
-      controller.train(steps=params.trainer.train_steps)
-    elif mode == 'train_and_eval':
-      controller.train_and_evaluate(
-          train_steps=params.trainer.train_steps,
-          eval_steps=params.trainer.validation_steps,
-          eval_interval=params.trainer.validation_interval)
-    elif mode == 'eval':
-      controller.evaluate(steps=params.trainer.validation_steps)
-    elif mode == 'continuous_eval':
-
-      def timeout_fn():
-        if trainer.global_step.numpy() >= params.trainer.train_steps:
-          return True
-        return False
-
-      controller.evaluate_continuously(
-          steps=params.trainer.validation_steps,
-          timeout=params.trainer.continuous_eval_timeout,
-          timeout_fn=timeout_fn)
-    else:
-      raise NotImplementedError('The mode is not implemented: %s' % mode)
-
-  num_params = train_utils.try_count_params(trainer.model)
-  if num_params is not None:
-    logging.info('Number of trainable params in model: %f Millions.',
-                 num_params / 10.**6)
-
-  flops = train_utils.try_count_flops(trainer.model)
-  if flops is not None:
-    logging.info('FLOPs (multi-adds) in model: %f Billions.',
-                 flops / 10.**9 / 2)
-
-  if run_post_eval or mode == 'train_and_post_eval':
-    with distribution_strategy.scope():
-      return trainer.model, controller.evaluate(
-          steps=params.trainer.validation_steps)
-  else:
-    return trainer.model, {}
+      eval_actions=eval_actions,
+      trainer=trainer,
+      controller_cls=controller_cls,
+  )
+  return runner.run()
--- a/official/core/train_lib_test.py
+++ b/official/core/train_lib_test.py
@@ -117,6 +117,61 @@ class TrainTest(tf.test.TestCase, parameterized.TestCase):
        model_dir=model_dir,
        run_post_eval=run_post_eval)

+  @combinations.generate(
+      combinations.combine(
+          distribution_strategy=[
+              strategy_combinations.default_strategy,
+              strategy_combinations.cloud_tpu_strategy,
+              strategy_combinations.one_device_strategy_gpu,
+          ],
+          flag_mode=['train', 'eval', 'train_and_eval'],
+          run_post_eval=[True, False]))
+  def test_end_to_end_class(self, distribution_strategy, flag_mode,
+                            run_post_eval):
+    model_dir = self.get_temp_dir()
+    flags_dict = dict(
+        experiment='mock',
+        mode=flag_mode,
+        model_dir=model_dir,
+        params_override=json.dumps(self._test_config))
+    with flagsaver.flagsaver(**flags_dict):
+      params = train_utils.parse_configuration(flags.FLAGS)
+      train_utils.serialize_config(params, model_dir)
+      with distribution_strategy.scope():
+        task = task_factory.get_task(params.task, logging_dir=model_dir)
+
+      _, logs = train_lib.OrbitExperimentRunner(
+          distribution_strategy=distribution_strategy,
+          task=task,
+          mode=flag_mode,
+          params=params,
+          model_dir=model_dir,
+          run_post_eval=run_post_eval).run()
+
+    if 'eval' in flag_mode:
+      self.assertTrue(
+          tf.io.gfile.exists(
+              os.path.join(model_dir,
+                           params.trainer.validation_summary_subdir)))
+    if run_post_eval:
+      self.assertNotEmpty(logs)
+    else:
+      self.assertEmpty(logs)
+    self.assertNotEmpty(
+        tf.io.gfile.glob(os.path.join(model_dir, 'params.yaml')))
+    if flag_mode == 'eval':
+      return
+    self.assertNotEmpty(
+        tf.io.gfile.glob(os.path.join(model_dir, 'checkpoint')))
+    # Tests continuous evaluation.
+    _, logs = train_lib.OrbitExperimentRunner(
+        distribution_strategy=distribution_strategy,
+        task=task,
+        mode='continuous_eval',
+        params=params,
+        model_dir=model_dir,
+        run_post_eval=run_post_eval).run()
+
  @combinations.generate(
      combinations.combine(
          distribution_strategy=[
@@ -148,12 +203,12 @@ class TrainTest(tf.test.TestCase, parameterized.TestCase):
        task.build_losses = build_losses

      with self.assertRaises(RuntimeError):
-        train_lib.run_experiment(
+        train_lib.OrbitExperimentRunner(
            distribution_strategy=distribution_strategy,
            task=task,
            mode=flag_mode,
            params=params,
-            model_dir=model_dir)
+            model_dir=model_dir).run()

  @combinations.generate(
      combinations.combine(
@@ -194,12 +249,12 @@ class TrainTest(tf.test.TestCase, parameterized.TestCase):

      task.build_losses = build_losses

-      model, _ = train_lib.run_experiment(
+      model, _ = train_lib.OrbitExperimentRunner(
          distribution_strategy=distribution_strategy,
          task=task,
          mode=flag_mode,
          params=params,
-          model_dir=model_dir)
+          model_dir=model_dir).run()
      after_weights = model.get_weights()
      for left, right in zip(before_weights, after_weights):
        self.assertAllEqual(left, right)

--- a/official/nlp/modeling/layers/gated_feedforward.py
+++ b/official/nlp/modeling/layers/gated_feedforward.py
@@ -19,6 +19,7 @@ import gin
 import tensorflow as tf

 from official.modeling import tf_utils
+from official.nlp.modeling.layers import util


 @tf.keras.utils.register_keras_serializable(package="Text")
@@ -57,9 +58,9 @@ class GatedFeedforward(tf.keras.layers.Layer):
  """

  def __init__(self,
-               intermediate_size,
-               intermediate_activation,
-               dropout,
+               inner_dim=768,
+               inner_activation=tf_utils.get_activation("gelu"),
+               dropout=0.0,
               use_gate=True,
               apply_output_layer_norm=True,
               num_blocks=1,
@@ -72,9 +73,12 @@ class GatedFeedforward(tf.keras.layers.Layer):
               kernel_constraint=None,
               bias_constraint=None,
               **kwargs):
-    super(GatedFeedforward, self).__init__(**kwargs)
-    self._intermediate_size = intermediate_size
-    self._intermediate_activation = intermediate_activation
+    inner_dim = kwargs.pop("intermediate_size", inner_dim)
+    inner_activation = kwargs.pop("intermediate_activation", inner_activation)
+    util.filter_kwargs(kwargs)
+    super().__init__(**kwargs)
+    self._inner_dim = inner_dim
+    self._inner_activation = inner_activation
    self._dropout = dropout
    self._use_gate = use_gate
    self._num_blocks = num_blocks
@@ -103,7 +107,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
        kernel_constraint=self._kernel_constraint,
        bias_constraint=self._bias_constraint)
    self._intermediate_dense = []
-    self._intermediate_activation_layers = []
+    self._inner_activation_layers = []
    self._gate_dense = []
    self._output_dense = []
    self._output_dropout = []
@@ -118,7 +122,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
      self._intermediate_dense.append(
          tf.keras.layers.EinsumDense(
              "abc,cd->abd",
-              output_shape=(None, self._intermediate_size),
+              output_shape=(None, self._inner_dim),
              bias_axes="d",
              name="intermediate_%d" % i,
              kernel_initializer=tf_utils.clone_initializer(
@@ -126,14 +130,14 @@ class GatedFeedforward(tf.keras.layers.Layer):
              bias_initializer=tf_utils.clone_initializer(
                  self._bias_initializer),
              **common_kwargs))
-      self._intermediate_activation_layers.append(
+      self._inner_activation_layers.append(
          tf.keras.layers.Activation(
-              self._intermediate_activation, dtype=activation_policy))
+              self._inner_activation, dtype=activation_policy))
      if self._use_gate:
        self._gate_dense.append(
            tf.keras.layers.EinsumDense(
                "abc,cd->abd",
-                output_shape=(None, self._intermediate_size),
+                output_shape=(None, self._inner_dim),
                bias_axes="d",
                name="gate_%d" % i,
                kernel_initializer=tf_utils.clone_initializer(
@@ -164,10 +168,10 @@ class GatedFeedforward(tf.keras.layers.Layer):

  def get_config(self):
    config = {
-        "intermediate_size":
-            self._intermediate_size,
-        "intermediate_activation":
-            self._intermediate_activation,
+        "inner_dim":
+            self._inner_dim,
+        "inner_activation":
+            self._inner_activation,
        "dropout":
            self._dropout,
        "use_gate":
@@ -191,7 +195,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
        "bias_constraint":
            tf.keras.constraints.serialize(self._bias_constraint)
    }
-    base_config = super(GatedFeedforward, self).get_config()
+    base_config = super().get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def call(self, inputs):
@@ -199,7 +203,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
    for i in range(self._num_blocks):
      layer_input = layer_output
      intermediate_output = self._intermediate_dense[i](layer_input)
-      intermediate_output = self._intermediate_activation_layers[i](
+      intermediate_output = self._inner_activation_layers[i](
          intermediate_output)
      if self._use_gate:
        gated_linear = self._gate_dense[i](layer_input)

--- a/official/nlp/modeling/layers/gated_feedforward_test.py
+++ b/official/nlp/modeling/layers/gated_feedforward_test.py
@@ -44,8 +44,8 @@ class GatedFeedforwardTest(keras_parameterized.TestCase):
  def test_layer_creation(self, use_gate, num_blocks, dropout_position, dtype):
    tf.keras.mixed_precision.set_global_policy(dtype)
    kwargs = dict(
-        intermediate_size=128,
-        intermediate_activation="relu",
+        inner_dim=128,
+        inner_activation="relu",
        dropout=0.1,
        use_gate=use_gate,
        num_blocks=num_blocks,
@@ -76,8 +76,8 @@ class GatedFeedforwardTest(keras_parameterized.TestCase):
                            dtype):
    tf.keras.mixed_precision.set_global_policy(dtype)
    kwargs = dict(
-        intermediate_size=16,
-        intermediate_activation="relu",
+        inner_dim=16,
+        inner_activation="relu",
        dropout=0.1,
        use_gate=use_gate,
        num_blocks=num_blocks,
@@ -104,8 +104,8 @@ class GatedFeedforwardTest(keras_parameterized.TestCase):

  def test_serialize_deserialize(self):
    kwargs = dict(
-        intermediate_size=16,
-        intermediate_activation="relu",
+        inner_dim=16,
+        inner_activation="relu",
        dropout=0.1,
        use_gate=False,
        num_blocks=4,

--- a/official/nlp/modeling/layers/rezero_transformer.py
+++ b/official/nlp/modeling/layers/rezero_transformer.py
@@ -76,7 +76,7 @@ class ReZeroTransformer(tf.keras.layers.Layer):
                                        attention_dropout_rate)
    dropout_rate = kwargs.pop("output_dropout", dropout_rate)
    inner_dim = kwargs.pop("intermediate_size", inner_dim)
-    inner_activation = kwargs.pop("inner_activation", inner_activation)
+    inner_activation = kwargs.pop("intermediate_activation", inner_activation)
    util.filter_kwargs(kwargs)
    super().__init__(**kwargs)


--- a/official/nlp/modeling/layers/tn_expand_condense_test.py
+++ b/official/nlp/modeling/layers/tn_expand_condense_test.py
@@ -19,8 +19,6 @@ import os
 from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
-# pylint: disable=g-direct-tensorflow-import
-from tensorflow.python.keras.testing_utils import layer_test
 from official.nlp.modeling.layers.tn_expand_condense import TNExpandCondense


@@ -45,13 +43,9 @@ class TNLayerTest(tf.test.TestCase, parameterized.TestCase):

  @parameterized.parameters((768, 6), (1024, 2))
  def test_keras_layer(self, input_dim, proj_multiple):
-    self.skipTest('Disable the test for now since it imports '
-                  'keras.testing_utils, will reenable this test after we '
-                  'fix the b/184578869')
-    # TODO(scottzhu): Reenable after fix b/184578869
    data = np.random.normal(size=(100, input_dim))
    data = data.astype(np.float32)
-    layer_test(
+    tf.keras.__internal__.utils.layer_test(
        TNExpandCondense,
        kwargs={
            'proj_multiplier': proj_multiple,
@@ -64,9 +58,9 @@ class TNLayerTest(tf.test.TestCase, parameterized.TestCase):

  @parameterized.parameters((768, 6), (1024, 2))
  def test_train(self, input_dim, proj_multiple):
+    tf.keras.utils.set_random_seed(0)
    data = np.random.randint(10, size=(100, input_dim))
    model = self._build_model(data, proj_multiple)
-    tf.keras.utils.set_random_seed(0)

    model.compile(
        optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

--- a/official/nlp/modeling/layers/transformer_encoder_block.py
+++ b/official/nlp/modeling/layers/transformer_encoder_block.py
@@ -75,7 +75,7 @@ class TransformerEncoderBlock(tf.keras.layers.Layer):
    E.g. let's say input dims are `[batch_size, seq_dim, input_last_dim]`.
    Scenario 1: If `output_last_dim` is not `None`, then the output dims of this
    module would be `[batch_size, seq_dim, output_last_dim]`. Note `key_dim` is
-    is overriden by `output_last_dim`.
+    overriden by `output_last_dim`.
    Scenario 2: If `output_last_dim` is `None` and `key_dim` is not `None`, then
    the output dims of this module would be `[batch_size, seq_dim, key_dim]`.
    Scenario 3: If the `output_last_dim` and `key_dim` are both `None`, the

--- a/official/projects/qat/vision/modeling/heads/dense_prediction_heads.py
+++ b/official/projects/qat/vision/modeling/heads/dense_prediction_heads.py
@@ -124,6 +124,7 @@ class RetinaNetHeadQuantized(tf.keras.layers.Layer):
      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
      num_params_per_anchor: int = 4,
+      share_classification_heads: bool = False,
      **kwargs):
    """Initializes a RetinaNet quantized head.

@@ -156,8 +157,13 @@ class RetinaNetHeadQuantized(tf.keras.layers.Layer):
        box. For example, `num_params_per_anchor` would be 4 for axis-aligned
        anchor boxes specified by their y-centers, x-centers, heights, and
        widths.
+      share_classification_heads: A `bool` that indicates whethere
+        sharing weights among the main and attribute classification heads. Not
+        used in the QAT model.
      **kwargs: Additional keyword arguments to be passed.
    """
+    del share_classification_heads
+
    super().__init__(**kwargs)
    self._config_dict = {
        'min_level': min_level,

--- a/official/projects/qat/vision/serving/export_module.py
+++ b/official/projects/qat/vision/serving/export_module.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Export modules for QAT model serving/inference."""
+
+import tensorflow as tf
+
+from official.projects.qat.vision.modeling import factory as qat_factory
+from official.vision.serving import image_classification
+from official.vision.serving import semantic_segmentation
+
+
+class ClassificationModule(image_classification.ClassificationModule):
+  """Classification Module."""
+
+  def _build_model(self):
+    model = super()._build_model()
+    input_specs = tf.keras.layers.InputSpec(shape=[self._batch_size] +
+                                            self._input_image_size + [3])
+    return qat_factory.build_qat_classification_model(
+        model, self.params.task.quantization, input_specs,
+        self.params.task.model)
+
+
+class SegmentationModule(semantic_segmentation.SegmentationModule):
+  """Segmentation Module."""
+
+  def _build_model(self):
+    model = super()._build_model()
+    input_specs = tf.keras.layers.InputSpec(shape=[self._batch_size] +
+                                            self._input_image_size + [3])
+    return qat_factory.build_qat_segmentation_model(
+        model, self.params.task.quantization, input_specs)
--- a/official/projects/qat/vision/serving/export_saved_model.py
+++ b/official/projects/qat/vision/serving/export_saved_model.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+r"""Vision models export binary for serving/inference.
+
+To export a trained checkpoint in saved_model format (shell script):
+
+EXPERIMENT_TYPE = XX
+CHECKPOINT_PATH = XX
+EXPORT_DIR_PATH = XX
+export_saved_model --experiment=${EXPERIMENT_TYPE} \
+                   --export_dir=${EXPORT_DIR_PATH}/ \
+                   --checkpoint_path=${CHECKPOINT_PATH} \
+                   --batch_size=2 \
+                   --input_image_size=224,224
+
+To serve (python):
+
+export_dir_path = XX
+input_type = XX
+input_images = XX
+imported = tf.saved_model.load(export_dir_path)
+model_fn = imported.signatures['serving_default']
+output = model_fn(input_images)
+"""
+
+from absl import app
+from absl import flags
+
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.projects.qat.vision import registry_imports  # pylint: disable=unused-import
+from official.projects.qat.vision.serving import export_module
+from official.vision import configs
+from official.vision.serving import export_saved_model_lib
+
+FLAGS = flags.FLAGS
+
+_EXPERIMENT = flags.DEFINE_string(
+    'experiment', None, 'experiment type, e.g. retinanet_resnetfpn_coco')
+_EXPORT_DIR = flags.DEFINE_string('export_dir', None, 'The export directory.')
+_CHECKPOINT_PATH = flags.DEFINE_string('checkpoint_path', None,
+                                       'Checkpoint path.')
+_CONFIG_FILE = flags.DEFINE_multi_string(
+    'config_file',
+    default=None,
+    help='YAML/JSON files which specifies overrides. The override order '
+    'follows the order of args. Note that each file '
+    'can be used as an override template to override the default parameters '
+    'specified in Python. If the same parameter is specified in both '
+    '`--config_file` and `--params_override`, `config_file` will be used '
+    'first, followed by params_override.')
+_PARAMS_OVERRIDE = flags.DEFINE_string(
+    'params_override', '',
+    'The JSON/YAML file or string which specifies the parameter to be overriden'
+    ' on top of `config_file` template.')
+_BATCH_SIZSE = flags.DEFINE_integer('batch_size', None, 'The batch size.')
+_IMAGE_TYPE = flags.DEFINE_string(
+    'input_type', 'image_tensor',
+    'One of `image_tensor`, `image_bytes`, `tf_example` and `tflite`.')
+_INPUT_IMAGE_SIZE = flags.DEFINE_string(
+    'input_image_size', '224,224',
+    'The comma-separated string of two integers representing the height,width '
+    'of the input to the model.')
+_EXPORT_CHECKPOINT_SUBDIR = flags.DEFINE_string(
+    'export_checkpoint_subdir', 'checkpoint',
+    'The subdirectory for checkpoints.')
+_EXPORT_SAVED_MODEL_SUBDIR = flags.DEFINE_string(
+    'export_saved_model_subdir', 'saved_model',
+    'The subdirectory for saved model.')
+_LOG_MODEL_FLOPS_AND_PARAMS = flags.DEFINE_bool(
+    'log_model_flops_and_params', False,
+    'If true, logs model flops and parameters.')
+_INPUT_NAME = flags.DEFINE_string(
+    'input_name', None,
+    'Input tensor name in signature def. Default at None which'
+    'produces input tensor name `inputs`.')
+
+
+def main(_):
+
+  params = exp_factory.get_exp_config(_EXPERIMENT.value)
+  for config_file in _CONFIG_FILE.value or []:
+    params = hyperparams.override_params_dict(
+        params, config_file, is_strict=True)
+  if _PARAMS_OVERRIDE.value:
+    params = hyperparams.override_params_dict(
+        params, _PARAMS_OVERRIDE.value, is_strict=True)
+
+  params.validate()
+  params.lock()
+
+  input_image_size = [int(x) for x in _INPUT_IMAGE_SIZE.value.split(',')]
+
+  if isinstance(params.task,
+                configs.image_classification.ImageClassificationTask):
+    export_module_cls = export_module.ClassificationModule
+  elif isinstance(params.task,
+                  configs.semantic_segmentation.SemanticSegmentationTask):
+    export_module_cls = export_module.SegmentationModule
+  else:
+    raise TypeError(f'Export module for {type(params.task)} is not supported.')
+
+  module = export_module_cls(
+      params=params,
+      batch_size=_BATCH_SIZSE.value,
+      input_image_size=input_image_size,
+      input_type=_IMAGE_TYPE.value,
+      num_channels=3)
+
+  export_saved_model_lib.export_inference_graph(
+      input_type=_IMAGE_TYPE.value,
+      batch_size=_BATCH_SIZSE.value,
+      input_image_size=input_image_size,
+      params=params,
+      checkpoint_path=_CHECKPOINT_PATH.value,
+      export_dir=_EXPORT_DIR.value,
+      export_checkpoint_subdir=_EXPORT_CHECKPOINT_SUBDIR.value,
+      export_saved_model_subdir=_EXPORT_SAVED_MODEL_SUBDIR.value,
+      export_module=module,
+      log_model_flops_and_params=_LOG_MODEL_FLOPS_AND_PARAMS.value,
+      input_name=_INPUT_NAME.value)
+
+
+if __name__ == '__main__':
+  app.run(main)
--- a/official/projects/qat/vision/serving/export_tflite.py
+++ b/official/projects/qat/vision/serving/export_tflite.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Binary to convert a saved model to TFLite model for the QAT model."""
+
+from absl import app
+
+from official.projects.qat.vision import registry_imports  # pylint: disable=unused-import
+from official.vision.serving import export_tflite
+
+if __name__ == '__main__':
+  app.run(export_tflite.main)
--- a/official/projects/volumetric_models/modeling/decoders/unet_3d_decoder.py
+++ b/official/projects/volumetric_models/modeling/decoders/unet_3d_decoder.py
@@ -95,7 +95,7 @@ class UNet3DDecoder(tf.keras.Model):
      channel_dim = 1

    # Build 3D UNet.
-    inputs = self._build_input_pyramid(input_specs, model_id)
+    inputs = self._build_input_pyramid(input_specs, model_id)  # pytype: disable=wrong-arg-types  # dynamic-method-lookup

    # Add levels with up-convolution or up-sampling.
    x = inputs[str(model_id)]

--- a/official/projects/yt8m/configs/yt8m.py
+++ b/official/projects/yt8m/configs/yt8m.py
@@ -88,6 +88,7 @@ class DataConfig(cfg.DataConfig):

 def yt8m(is_training):
  """YT8M dataset configs."""
+  # pylint: disable=unexpected-keyword-arg
  return DataConfig(
      num_frames=30,
      temporal_stride=1,
@@ -95,8 +96,10 @@ def yt8m(is_training):
      segment_size=5,
      is_training=is_training,
      split='train' if is_training else 'valid',
+      drop_remainder=is_training,  # pytype: disable=wrong-keyword-args
      num_examples=YT8M_TRAIN_EXAMPLES if is_training else YT8M_VAL_EXAMPLES,
      input_path=YT8M_TRAIN_PATH if is_training else YT8M_VAL_PATH)
+  # pylint: enable=unexpected-keyword-arg


 @dataclasses.dataclass

--- a/official/projects/yt8m/dataloaders/yt8m_input.py
+++ b/official/projects/yt8m/dataloaders/yt8m_input.py
@@ -22,7 +22,6 @@
  back into a range between min_quantized_value and max_quantized_value.
  link for details: https://research.google.com/youtube8m/download.html
 """
-
 from typing import Dict

 import tensorflow as tf
@@ -424,8 +423,9 @@ class PostBatchProcessor():
                                 [-1, self.num_classes])

    else:
-      video_matrix = tf.squeeze(video_matrix)
-      labels = tf.squeeze(labels)
+      # NOTE(b/237445211): Must provide axis argument to tf.squeeze.
+      video_matrix = tf.squeeze(video_matrix, axis=1)
+      labels = tf.squeeze(labels, axis=1)

    batched_tensors = {
        "video_matrix": video_matrix,
@@ -449,13 +449,15 @@ class TransformBatcher():
    self._global_batch_size = input_params.global_batch_size
    self._is_training = input_params.is_training
    self._include_video_id = input_params.include_video_id
+    self._drop_remainder = input_params.drop_remainder

  def batch_fn(self, dataset, input_context):
    """Add padding when segment_labels is true."""
    per_replica_batch_size = input_context.get_per_replica_batch_size(
        self._global_batch_size) if input_context else self._global_batch_size
    if not self._segment_labels:
-      dataset = dataset.batch(per_replica_batch_size, drop_remainder=True)
+      dataset = dataset.batch(
+          per_replica_batch_size, drop_remainder=self._drop_remainder)
    else:
      # add padding
      pad_shapes = {
@@ -476,6 +478,6 @@ class TransformBatcher():
      dataset = dataset.padded_batch(
          per_replica_batch_size,
          padded_shapes=pad_shapes,
-          drop_remainder=True,
+          drop_remainder=self._drop_remainder,
          padding_values=pad_values)
    return dataset
--- a/official/projects/yt8m/eval_utils/eval_util.py
+++ b/official/projects/yt8m/eval_utils/eval_util.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 """Provides functions to help with evaluating models."""
+import logging
 import numpy as np
 import tensorflow as tf
 from official.projects.yt8m.eval_utils import average_precision_calculator as ap_calculator
@@ -57,6 +58,9 @@ def calculate_precision_at_equal_recall_rate(predictions, actuals):
  """
  aggregated_precision = 0.0
  num_videos = actuals.shape[0]
+  if num_videos == 0:
+    logging.warning("Num_videos is 0, returning 0.0 aggregated_precision.")
+    return aggregated_precision
  for row in np.arange(num_videos):
    num_labels = int(np.sum(actuals[row]))
    top_indices = np.argpartition(predictions[row], -num_labels)[-num_labels:]

--- a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
@@ -180,7 +180,7 @@ class PanopticDeeplabTask(cfg.TaskConfig):


 @exp_factory.register_config_factory('panoptic_deeplab_resnet_coco')
-def panoptic_deeplab_coco() -> cfg.ExperimentConfig:
+def panoptic_deeplab_resnet_coco() -> cfg.ExperimentConfig:
  """COCO panoptic segmentation with Panoptic Deeplab."""
  train_steps = 200000
  train_batch_size = 64
@@ -344,3 +344,327 @@ def panoptic_deeplab_coco() -> cfg.ExperimentConfig:
          'task.validation_data.is_training != None'
      ])
  return config
+
+
+@exp_factory.register_config_factory('panoptic_deeplab_mobilenetv3_large_coco')
+def panoptic_deeplab_mobilenetv3_large_coco() -> cfg.ExperimentConfig:
+  """COCO panoptic segmentation with Panoptic Deeplab."""
+  train_steps = 200000
+  train_batch_size = 64
+  eval_batch_size = 1
+  steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
+  validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size
+
+  num_panoptic_categories = 201
+  num_thing_categories = 91
+  ignore_label = 0
+
+  is_thing = [False]
+  for idx in range(1, num_panoptic_categories):
+    is_thing.append(True if idx <= num_thing_categories else False)
+
+  input_size = [640, 640, 3]
+  output_stride = 16
+  aspp_dilation_rates = [6, 12, 18]
+  level = int(np.math.log2(output_stride))
+
+  config = cfg.ExperimentConfig(
+      runtime=cfg.RuntimeConfig(
+          mixed_precision_dtype='float32', enable_xla=True),
+      task=PanopticDeeplabTask(
+          init_checkpoint='gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/mobilenetv3_large/ckpt-156000',
+          init_checkpoint_modules=['backbone'],
+          model=PanopticDeeplab(
+              num_classes=num_panoptic_categories,
+              input_size=input_size,
+              backbone=backbones.Backbone(
+                  type='mobilenet', mobilenet=backbones.MobileNet(
+                      model_id='MobileNetV3Large',
+                      filter_size_scale=1.0,
+                      stochastic_depth_drop_rate=0.0,
+                      output_stride=output_stride)),
+              decoder=decoders.Decoder(
+                  type='aspp',
+                  aspp=decoders.ASPP(
+                      level=level,
+                      num_filters=256,
+                      pool_kernel_size=input_size[:2],
+                      dilation_rates=aspp_dilation_rates,
+                      use_depthwise_convolution=True,
+                      dropout_rate=0.1)),
+              semantic_head=SemanticHead(
+                  level=level,
+                  num_convs=1,
+                  num_filters=256,
+                  kernel_size=5,
+                  use_depthwise_convolution=True,
+                  upsample_factor=1,
+                  low_level=[3, 2],
+                  low_level_num_filters=[64, 32],
+                  fusion_num_output_filters=256,
+                  prediction_kernel_size=1),
+              instance_head=InstanceHead(
+                  level=level,
+                  num_convs=1,
+                  num_filters=32,
+                  kernel_size=5,
+                  use_depthwise_convolution=True,
+                  upsample_factor=1,
+                  low_level=[3, 2],
+                  low_level_num_filters=[32, 16],
+                  fusion_num_output_filters=128,
+                  prediction_kernel_size=1),
+              shared_decoder=False,
+              generate_panoptic_masks=True,
+              post_processor=PanopticDeeplabPostProcessor(
+                  output_size=input_size[:2],
+                  center_score_threshold=0.1,
+                  thing_class_ids=list(range(1, num_thing_categories)),
+                  label_divisor=256,
+                  stuff_area_limit=4096,
+                  ignore_label=ignore_label,
+                  nms_kernel=41,
+                  keep_k_centers=200,
+                  rescale_predictions=True)),
+          losses=Losses(
+              label_smoothing=0.0,
+              ignore_label=ignore_label,
+              l2_weight_decay=0.0,
+              top_k_percent_pixels=0.2,
+              segmentation_loss_weight=1.0,
+              center_heatmap_loss_weight=200,
+              center_offset_loss_weight=0.01),
+          train_data=DataConfig(
+              input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
+              is_training=True,
+              global_batch_size=train_batch_size,
+              parser=Parser(
+                  aug_scale_min=0.5,
+                  aug_scale_max=2.0,
+                  aug_rand_hflip=True,
+                  aug_type=common.Augmentation(
+                      type='autoaug',
+                      autoaug=common.AutoAugment(
+                          augmentation_name='panoptic_deeplab_policy')),
+                  sigma=8.0,
+                  small_instance_area_threshold=4096,
+                  small_instance_weight=3.0)),
+          validation_data=DataConfig(
+              input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
+              is_training=False,
+              global_batch_size=eval_batch_size,
+              parser=Parser(
+                  resize_eval_groundtruth=False,
+                  groundtruth_padded_size=[640, 640],
+                  aug_scale_min=1.0,
+                  aug_scale_max=1.0,
+                  aug_rand_hflip=False,
+                  aug_type=None,
+                  sigma=8.0,
+                  small_instance_area_threshold=4096,
+                  small_instance_weight=3.0),
+              drop_remainder=False),
+          evaluation=Evaluation(
+              ignored_label=ignore_label,
+              max_instances_per_category=256,
+              offset=256*256*256,
+              is_thing=is_thing,
+              rescale_predictions=True,
+              report_per_class_pq=False,
+              report_per_class_iou=False,
+              report_train_mean_iou=False)),
+      trainer=cfg.TrainerConfig(
+          train_steps=train_steps,
+          validation_steps=validation_steps,
+          validation_interval=steps_per_epoch,
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'adam',
+              },
+              'learning_rate': {
+                  'type': 'polynomial',
+                  'polynomial': {
+                      'initial_learning_rate': 0.001,
+                      'decay_steps': train_steps,
+                      'end_learning_rate': 0.0,
+                      'power': 0.9
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 2000,
+                      'warmup_learning_rate': 0
+                  }
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+  return config
+
+
+@exp_factory.register_config_factory('panoptic_deeplab_mobilenetv3_small_coco')
+def panoptic_deeplab_mobilenetv3_small_coco() -> cfg.ExperimentConfig:
+  """COCO panoptic segmentation with Panoptic Deeplab."""
+  train_steps = 200000
+  train_batch_size = 64
+  eval_batch_size = 1
+  steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
+  validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size
+
+  num_panoptic_categories = 201
+  num_thing_categories = 91
+  ignore_label = 0
+
+  is_thing = [False]
+  for idx in range(1, num_panoptic_categories):
+    is_thing.append(True if idx <= num_thing_categories else False)
+
+  input_size = [640, 640, 3]
+  output_stride = 16
+  aspp_dilation_rates = [6, 12, 18]
+  level = int(np.math.log2(output_stride))
+
+  config = cfg.ExperimentConfig(
+      runtime=cfg.RuntimeConfig(
+          mixed_precision_dtype='float32', enable_xla=True),
+      task=PanopticDeeplabTask(
+          init_checkpoint='gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/mobilenetv3_small/ckpt-312000',
+          init_checkpoint_modules=['backbone'],
+          model=PanopticDeeplab(
+              num_classes=num_panoptic_categories,
+              input_size=input_size,
+              backbone=backbones.Backbone(
+                  type='mobilenet', mobilenet=backbones.MobileNet(
+                      model_id='MobileNetV3Small',
+                      filter_size_scale=1.0,
+                      stochastic_depth_drop_rate=0.0,
+                      output_stride=output_stride)),
+              decoder=decoders.Decoder(
+                  type='aspp',
+                  aspp=decoders.ASPP(
+                      level=level,
+                      num_filters=256,
+                      pool_kernel_size=input_size[:2],
+                      dilation_rates=aspp_dilation_rates,
+                      use_depthwise_convolution=True,
+                      dropout_rate=0.1)),
+              semantic_head=SemanticHead(
+                  level=level,
+                  num_convs=1,
+                  num_filters=256,
+                  kernel_size=5,
+                  use_depthwise_convolution=True,
+                  upsample_factor=1,
+                  low_level=[3, 2],
+                  low_level_num_filters=[64, 32],
+                  fusion_num_output_filters=256,
+                  prediction_kernel_size=1),
+              instance_head=InstanceHead(
+                  level=level,
+                  num_convs=1,
+                  num_filters=32,
+                  kernel_size=5,
+                  use_depthwise_convolution=True,
+                  upsample_factor=1,
+                  low_level=[3, 2],
+                  low_level_num_filters=[32, 16],
+                  fusion_num_output_filters=128,
+                  prediction_kernel_size=1),
+              shared_decoder=False,
+              generate_panoptic_masks=True,
+              post_processor=PanopticDeeplabPostProcessor(
+                  output_size=input_size[:2],
+                  center_score_threshold=0.1,
+                  thing_class_ids=list(range(1, num_thing_categories)),
+                  label_divisor=256,
+                  stuff_area_limit=4096,
+                  ignore_label=ignore_label,
+                  nms_kernel=41,
+                  keep_k_centers=200,
+                  rescale_predictions=True)),
+          losses=Losses(
+              label_smoothing=0.0,
+              ignore_label=ignore_label,
+              l2_weight_decay=0.0,
+              top_k_percent_pixels=0.2,
+              segmentation_loss_weight=1.0,
+              center_heatmap_loss_weight=200,
+              center_offset_loss_weight=0.01),
+          train_data=DataConfig(
+              input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
+              is_training=True,
+              global_batch_size=train_batch_size,
+              parser=Parser(
+                  aug_scale_min=0.5,
+                  aug_scale_max=2.0,
+                  aug_rand_hflip=True,
+                  aug_type=common.Augmentation(
+                      type='autoaug',
+                      autoaug=common.AutoAugment(
+                          augmentation_name='panoptic_deeplab_policy')),
+                  sigma=8.0,
+                  small_instance_area_threshold=4096,
+                  small_instance_weight=3.0)),
+          validation_data=DataConfig(
+              input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
+              is_training=False,
+              global_batch_size=eval_batch_size,
+              parser=Parser(
+                  resize_eval_groundtruth=False,
+                  groundtruth_padded_size=[640, 640],
+                  aug_scale_min=1.0,
+                  aug_scale_max=1.0,
+                  aug_rand_hflip=False,
+                  aug_type=None,
+                  sigma=8.0,
+                  small_instance_area_threshold=4096,
+                  small_instance_weight=3.0),
+              drop_remainder=False),
+          evaluation=Evaluation(
+              ignored_label=ignore_label,
+              max_instances_per_category=256,
+              offset=256*256*256,
+              is_thing=is_thing,
+              rescale_predictions=True,
+              report_per_class_pq=False,
+              report_per_class_iou=False,
+              report_train_mean_iou=False)),
+      trainer=cfg.TrainerConfig(
+          train_steps=train_steps,
+          validation_steps=validation_steps,
+          validation_interval=steps_per_epoch,
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'adam',
+              },
+              'learning_rate': {
+                  'type': 'polynomial',
+                  'polynomial': {
+                      'initial_learning_rate': 0.001,
+                      'decay_steps': train_steps,
+                      'end_learning_rate': 0.0,
+                      'power': 0.9
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 2000,
+                      'warmup_learning_rate': 0
+                  }
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+  return config