Merge branch 'tensorflow:master' into panoptic-deeplab

44f6d511 · Srihari Humbarwadi · GitHub · 686a287d · 8bc5a1a5 · 44f6d511
Unverified Commit 44f6d511 authored Apr 25, 2022 by Srihari Humbarwadi Committed by GitHub Apr 25, 2022
20 changed files
--- a/official/nlp/optimization.py
+++ b/official/nlp/optimization.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-"""Functions and classes related to optimization (weight updates)."""
-
-import re
+"""Legacy functions and classes related to optimization."""

 from absl import logging
 import gin
 import tensorflow as tf
 import tensorflow_addons.optimizers as tfa_optimizers
+from official.modeling.optimization import legacy_adamw
+
+AdamWeightDecay = legacy_adamw.AdamWeightDecay


 class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
@@ -105,126 +106,3 @@ def create_optimizer(init_lr,
    raise ValueError('Unsupported optimizer type: ', optimizer_type)

  return optimizer
-
-
-class AdamWeightDecay(tf.keras.optimizers.Adam):
-  """Adam enables L2 weight decay and clip_by_global_norm on gradients.
-
-  Just adding the square of the weights to the loss function is *not* the
-  correct way of using L2 regularization/weight decay with Adam, since that will
-  interact with the m and v parameters in strange ways.
-
-  Instead we want to decay the weights in a manner that doesn't interact with
-  the m/v parameters. This is equivalent to adding the square of the weights to
-  the loss with plain (non-momentum) SGD.
-  """
-
-  def __init__(self,
-               learning_rate=0.001,
-               beta_1=0.9,
-               beta_2=0.999,
-               epsilon=1e-7,
-               amsgrad=False,
-               weight_decay_rate=0.0,
-               include_in_weight_decay=None,
-               exclude_from_weight_decay=None,
-               gradient_clip_norm=1.0,
-               name='AdamWeightDecay',
-               **kwargs):
-    super(AdamWeightDecay, self).__init__(learning_rate, beta_1, beta_2,
-                                          epsilon, amsgrad, name, **kwargs)
-    self.weight_decay_rate = weight_decay_rate
-    self.gradient_clip_norm = gradient_clip_norm
-    self._include_in_weight_decay = include_in_weight_decay
-    self._exclude_from_weight_decay = exclude_from_weight_decay
-    logging.info('gradient_clip_norm=%f', gradient_clip_norm)
-
-  @classmethod
-  def from_config(cls, config):
-    """Creates an optimizer from its config with WarmUp custom object."""
-    custom_objects = {'WarmUp': WarmUp}
-    return super(AdamWeightDecay, cls).from_config(
-        config, custom_objects=custom_objects)
-
-  def _prepare_local(self, var_device, var_dtype, apply_state):
-    super(AdamWeightDecay, self)._prepare_local(var_device, var_dtype,  # pytype: disable=attribute-error  # typed-keras
-                                                apply_state)
-    apply_state[(var_device, var_dtype)]['weight_decay_rate'] = tf.constant(
-        self.weight_decay_rate, name='adam_weight_decay_rate')
-
-  def _decay_weights_op(self, var, learning_rate, apply_state):
-    do_decay = self._do_use_weight_decay(var.name)
-    if do_decay:
-      return var.assign_sub(
-          learning_rate * var *
-          apply_state[(var.device, var.dtype.base_dtype)]['weight_decay_rate'],
-          use_locking=self._use_locking)
-    return tf.no_op()
-
-  def apply_gradients(self,
-                      grads_and_vars,
-                      name=None,
-                      experimental_aggregate_gradients=True):
-    grads, tvars = list(zip(*grads_and_vars))
-    if experimental_aggregate_gradients and self.gradient_clip_norm > 0.0:
-      # when experimental_aggregate_gradients = False, apply_gradients() no
-      # longer implicitly allreduce gradients, users manually allreduce gradient
-      # and passed the allreduced grads_and_vars. For now, the
-      # clip_by_global_norm will be moved to before the explicit allreduce to
-      # keep the math the same as TF 1 and pre TF 2.2 implementation.
-      (grads, _) = tf.clip_by_global_norm(
-          grads, clip_norm=self.gradient_clip_norm)
-    return super(AdamWeightDecay, self).apply_gradients(
-        zip(grads, tvars),
-        name=name,
-        experimental_aggregate_gradients=experimental_aggregate_gradients)
-
-  def _get_lr(self, var_device, var_dtype, apply_state):
-    """Retrieves the learning rate with the given state."""
-    if apply_state is None:
-      return self._decayed_lr_t[var_dtype], {}
-
-    apply_state = apply_state or {}
-    coefficients = apply_state.get((var_device, var_dtype))
-    if coefficients is None:
-      coefficients = self._fallback_apply_state(var_device, var_dtype)
-      apply_state[(var_device, var_dtype)] = coefficients
-
-    return coefficients['lr_t'], dict(apply_state=apply_state)
-
-  def _resource_apply_dense(self, grad, var, apply_state=None):
-    lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
-    decay = self._decay_weights_op(var, lr_t, apply_state)
-    with tf.control_dependencies([decay]):
-      return super(AdamWeightDecay,
-                   self)._resource_apply_dense(grad, var, **kwargs)  # pytype: disable=attribute-error  # typed-keras
-
-  def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
-    lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
-    decay = self._decay_weights_op(var, lr_t, apply_state)
-    with tf.control_dependencies([decay]):
-      return super(AdamWeightDecay,
-                   self)._resource_apply_sparse(grad, var, indices, **kwargs)  # pytype: disable=attribute-error  # typed-keras
-
-  def get_config(self):
-    config = super(AdamWeightDecay, self).get_config()
-    config.update({
-        'weight_decay_rate': self.weight_decay_rate,
-    })
-    return config
-
-  def _do_use_weight_decay(self, param_name):
-    """Whether to use L2 weight decay for `param_name`."""
-    if self.weight_decay_rate == 0:
-      return False
-
-    if self._include_in_weight_decay:
-      for r in self._include_in_weight_decay:
-        if re.search(r, param_name) is not None:
-          return True
-
-    if self._exclude_from_weight_decay:
-      for r in self._exclude_from_weight_decay:
-        if re.search(r, param_name) is not None:
-          return False
-    return True
--- a/official/projects/README.md
+++ b/official/projects/README.md
 # TensorFlow Model Garden Modeling Projects

-This directory contains projects using TensorFlow Model Garden Modeling
-libraries.
+This directory contains projects using Modeling libraries of TensorFlow Model
+Garden. More details about each project can be found in the individual
+project folders listed below.

 ## Projects

-*   [NHNet](nhnet):
-    [Generating Representative Headlines for News Stories](https://arxiv.org/abs/2001.09386)
-    by Gu et al, 2020
-
+* [AssembleNet](./assemblenet/README.md)
+* [BASNet](./basnet/README.md)
+* [BigBird](./bigbird/README.md)
+* [DeepMAC Mask-RCNN](./deepmac_maskrcnn/README.md)
+* [DETR](./detr/README.md)
+* [Edge-TPU for Vision and NLP](./edgetpu/README.md)
+* [Language-agnostic BERT Sentence Embedding](./labse/README.md)
+* [Long-Document Transformer](./longformer/README.md)
+* [MobileBERT](./mobilebert/README.md)
+* [MoViNets](./movinet/README.md)
+* [News Headline Generation Model: NHNet](./nhnet/README.md)
+* [Training with Pruning](./pruning/README.md)
+* [QAT for Computer Vision](./qat/vision/README.md)
+* [Roformer Project](./roformer/README.md)
+* [Training ELECTRA Augmented with Multi-word Selection](./teams/README.md)
+* [NLP example project](./text_classification_example/README.md)
+* [TensorNetwork BERT](./tn_bert/README.md)
+* [Token Dropping for Efficient BERT Pretraining](./token_dropping/README.md)
+* [Spatiotemporal Contrastive Video Representation Learning](./video_ssl/README.md)
+* [Vision Transformer (ViT)](./vit/README.md)
+* [Data-Efficient Image Transformer (DEIT)](./vit/README.md)
+* [Volumetric Models](./volumetric_models/README.md)
+* [YouTube-8M Tensorflow Starter Code](./yt8m/README.md)
--- a/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_fpn_600epochs.yaml
+++ b/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_fpn_600epochs.yaml
+task:
+  # init_checkpoint: 'a_pretrained_backbone_checkpoint'
+  init_checkpoint_modules: backbone
+  freeze_backbone: true
+  model:
+    backbone:
+      resnet:
+        model_id: 101
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        scale_stem: true
+        se_ratio: 0.25
+        stem_type: v1
+      type: resnet
+    decoder:
+      type: fpn
+    detection_head:
+      num_fcs: 2
+    norm_activation:
+      activation: swish
+  train_data:
+    global_batch_size: 64
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [1062734, 1090458]
+        name: PiecewiseConstantDecay
+        offset: 0
+        values: [0.16, 0.016, 0.0016]
+      type: stepwise
+  steps_per_loop: 1848
+  summary_interval: 1848
+  train_steps: 1108940
--- a/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_fpn_72epochs.yaml
+++ b/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_fpn_72epochs.yaml
+task:
+  # init_checkpoint: 'a_pretrained_backbone_checkpoint'
+  init_checkpoint_modules: backbone
+  freeze_backbone: true
+  model:
+    backbone:
+      resnet:
+        model_id: 101
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        scale_stem: true
+        se_ratio: 0.25
+        stem_type: v1
+      type: resnet
+    decoder:
+      type: fpn
+    detection_head:
+      num_fcs: 2
+    norm_activation:
+      activation: swish
+  train_data:
+    global_batch_size: 64
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [88704, 125664]
+        name: PiecewiseConstantDecay
+        offset: 0
+        values: [0.16, 0.016, 0.0016]
+      type: stepwise
+  steps_per_loop: 1848
+  summary_interval: 1848
+  train_steps: 133056
--- a/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_fpn_cascade_600epochs.yaml
+++ b/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_fpn_cascade_600epochs.yaml
+task:
+  # init_checkpoint: 'a_pretrained_backbone_checkpoint'
+  init_checkpoint_modules: backbone
+  freeze_backbone: true
+  model:
+    backbone:
+      resnet:
+        model_id: 101
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        scale_stem: true
+        se_ratio: 0.25
+        stem_type: v1
+      type: resnet
+    decoder:
+      type: fpn
+    detection_head:
+      cascade_class_ensemble: true
+      class_agnostic_bbox_pred: true
+      num_fcs: 2
+    input_size: [1280, 1280, 3]
+    norm_activation:
+      activation: swish
+    roi_sampler:
+      cascade_iou_thresholds: [0.7, 0.8]
+  train_data:
+    global_batch_size: 64
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [1062734, 1090458]
+        name: PiecewiseConstantDecay
+        offset: 0
+        values: [0.16, 0.016, 0.0016]
+      type: stepwise
+  steps_per_loop: 1848
+  summary_interval: 1848
+  train_steps: 1108940
--- a/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_fpn_cascade_72epochs.yaml
+++ b/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_fpn_cascade_72epochs.yaml
+task:
+  # init_checkpoint: 'a_pretrained_backbone_checkpoint'
+  init_checkpoint_modules: backbone
+  freeze_backbone: true
+  model:
+    backbone:
+      resnet:
+        model_id: 101
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        scale_stem: true
+        se_ratio: 0.25
+        stem_type: v1
+      type: resnet
+    decoder:
+      type: fpn
+    detection_head:
+      cascade_class_ensemble: true
+      class_agnostic_bbox_pred: true
+      num_fcs: 2
+    input_size: [1280, 1280, 3]
+    norm_activation:
+      activation: swish
+    roi_sampler:
+      cascade_iou_thresholds: [0.7, 0.8]
+  train_data:
+    global_batch_size: 64
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [88704, 125664]
+        name: PiecewiseConstantDecay
+        offset: 0
+        values: [0.16, 0.016, 0.0016]
+      type: stepwise
+  steps_per_loop: 1848
+  summary_interval: 1848
+  train_steps: 133056
--- a/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_nasfpn_600epochs.yaml
+++ b/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_nasfpn_600epochs.yaml
+task:
+  # init_checkpoint: 'a_pretrained_backbone_checkpoint'
+  init_checkpoint_modules: backbone
+  freeze_backbone: true
+  model:
+    backbone:
+      resnet:
+        model_id: 101
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        scale_stem: true
+        se_ratio: 0.25
+        stem_type: v1
+      type: resnet
+    decoder:
+      type: nasfpn
+    detection_head:
+      num_fcs: 2
+    include_mask: false
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: swish
+  train_data:
+    global_batch_size: 64
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [1062734, 1090458]
+        name: PiecewiseConstantDecay
+        offset: 0
+        values: [0.16, 0.016, 0.0016]
+      type: stepwise
+  steps_per_loop: 1848
+  summary_interval: 1848
+  train_steps: 1108940
--- a/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_nasfpn_72epochs.yaml
+++ b/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_nasfpn_72epochs.yaml
+task:
+  # init_checkpoint: 'a_pretrained_backbone_checkpoint'
+  init_checkpoint_modules: backbone
+  freeze_backbone: true
+  model:
+    backbone:
+      resnet:
+        model_id: 101
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        scale_stem: true
+        se_ratio: 0.25
+        stem_type: v1
+      type: resnet
+    decoder:
+      type: nasfpn
+    detection_head:
+      num_fcs: 2
+    include_mask: false
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: swish
+  train_data:
+    global_batch_size: 64
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [88704, 125664]
+        name: PiecewiseConstantDecay
+        offset: 0
+        values: [0.16, 0.016, 0.0016]
+      type: stepwise
+  steps_per_loop: 1848
+  summary_interval: 1848
+  train_steps: 133056
--- a/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_nasfpn_cascade_600epochs.yaml
+++ b/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_nasfpn_cascade_600epochs.yaml
+task:
+  # init_checkpoint: 'a_pretrained_backbone_checkpoint'
+  init_checkpoint_modules: backbone
+  freeze_backbone: true
+  model:
+    backbone:
+      resnet:
+        model_id: 101
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        scale_stem: true
+        se_ratio: 0.25
+        stem_type: v1
+      type: resnet
+    decoder:
+      type: nasfpn
+    detection_head:
+      cascade_class_ensemble: true
+      class_agnostic_bbox_pred: true
+      num_fcs: 2
+    input_size: [1280, 1280, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: swish
+    roi_sampler:
+      cascade_iou_thresholds: [0.7, 0.8]
+  train_data:
+    global_batch_size: 64
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [1062734, 1090458]
+        name: PiecewiseConstantDecay
+        offset: 0
+        values: [0.16, 0.016, 0.0016]
+      type: stepwise
+  steps_per_loop: 1848
+  summary_interval: 1848
+  train_steps: 1108940
--- a/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_nasfpn_cascade_72epochs.yaml
+++ b/official/projects/backbone_reuse/configs/experiments/faster_rcnn/fastrcnn_resnet101_nasfpn_cascade_72epochs.yaml
+task:
+  # init_checkpoint: 'a_pretrained_backbone_checkpoint'
+  init_checkpoint_modules: backbone
+  freeze_backbone: true
+  model:
+    backbone:
+      resnet:
+        model_id: 101
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        scale_stem: true
+        se_ratio: 0.25
+        stem_type: v1
+      type: resnet
+    decoder:
+      type: nasfpn
+    detection_head:
+      cascade_class_ensemble: true
+      class_agnostic_bbox_pred: true
+      num_fcs: 2
+    input_size: [1280, 1280, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: swish
+    roi_sampler:
+      cascade_iou_thresholds: [0.7, 0.8]
+  train_data:
+    global_batch_size: 64
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [88704, 125664]
+        name: PiecewiseConstantDecay
+        offset: 0
+        values: [0.16, 0.016, 0.0016]
+      type: stepwise
+  steps_per_loop: 1848
+  summary_interval: 1848
+  train_steps: 133056
--- a/official/projects/pruning/tasks/image_classification_test.py
+++ b/official/projects/pruning/tasks/image_classification_test.py
@@ -15,6 +15,7 @@
 """Tests for image classification task."""

 # pylint: disable=unused-import
+import os
 import tempfile

 from absl.testing import parameterized
@@ -28,6 +29,7 @@ from official.core import actions
 from official.core import exp_factory
 from official.modeling import optimization
 from official.projects.pruning.tasks import image_classification as img_cls_task
+from official.vision.dataloaders import tfexample_utils


 class ImageClassificationTaskTest(tf.test.TestCase, parameterized.TestCase):
@@ -88,11 +90,27 @@ class ImageClassificationTaskTest(tf.test.TestCase, parameterized.TestCase):
    self.assertIn('accuracy', logs)
    self.assertIn('top_5_accuracy', logs)

+  def _create_test_tfrecord(self, test_tfrecord_file, num_samples,
+                            input_image_size):
+    example = tf.train.Example.FromString(
+        tfexample_utils.create_classification_example(
+            image_height=input_image_size[0], image_width=input_image_size[1]))
+    examples = [example] * num_samples
+    tfexample_utils.dump_to_tfrecord(
+        record_file=test_tfrecord_file, tf_examples=examples)
+
  @parameterized.parameters(('resnet_imagenet_pruning'),
                            ('mobilenet_imagenet_pruning'))
  def testTaskWithUnstructuredSparsity(self, config_name):
+    test_tfrecord_file = os.path.join(self.get_temp_dir(), 'cls_test.tfrecord')
+    self._create_test_tfrecord(
+        test_tfrecord_file=test_tfrecord_file,
+        num_samples=10,
+        input_image_size=[224, 224])
    config = exp_factory.get_exp_config(config_name)
    config.task.train_data.global_batch_size = 2
+    config.task.validation_data.input_path = test_tfrecord_file
+    config.task.train_data.input_path = test_tfrecord_file

    task = img_cls_task.ImageClassificationTask(config.task)
    model = task.build_model()
@@ -129,8 +147,16 @@ class ImageClassificationTaskTest(tf.test.TestCase, parameterized.TestCase):
  @parameterized.parameters(('resnet_imagenet_pruning'),
                            ('mobilenet_imagenet_pruning'))
  def testTaskWithStructuredSparsity(self, config_name):
+    test_tfrecord_file = os.path.join(self.get_temp_dir(), 'cls_test.tfrecord')
+    self._create_test_tfrecord(
+        test_tfrecord_file=test_tfrecord_file,
+        num_samples=10,
+        input_image_size=[224, 224])
    config = exp_factory.get_exp_config(config_name)
    config.task.train_data.global_batch_size = 2
+    config.task.validation_data.input_path = test_tfrecord_file
+    config.task.train_data.input_path = test_tfrecord_file
+
    # Add structured sparsity
    config.task.pruning.sparsity_m_by_n = (2, 4)
    config.task.pruning.frequency = 1

--- a/official/projects/qat/vision/modeling/layers/nn_layers.py
+++ b/official/projects/qat/vision/modeling/layers/nn_layers.py
@@ -14,6 +14,7 @@

 """Contains common building blocks for neural networks."""

+import enum
 from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union

 import tensorflow as tf
@@ -31,6 +32,14 @@ States = Dict[str, tf.Tensor]
 Activation = Union[str, Callable]


+# String constants.
+class FeatureFusion(str, enum.Enum):
+  PYRAMID_FUSION = 'pyramid_fusion'
+  PANOPTIC_FPN_FUSION = 'panoptic_fpn_fusion'
+  DEEPLABV3PLUS = 'deeplabv3plus'
+  DEEPLABV3PLUS_SUM_TO_MERGE = 'deeplabv3plus_sum_to_merge'
+
+
 @tf.keras.utils.register_keras_serializable(package='Vision')
 class SqueezeExcitationQuantized(
    helper.LayerQuantizerHelper,
@@ -237,10 +246,11 @@ class SegmentationHeadQuantized(tf.keras.layers.Layer):
        prediction layer.
      upsample_factor: An `int` number to specify the upsampling factor to
        generate finer mask. Default 1 means no upsampling is applied.
-      feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, or None. If
-        `deeplabv3plus`, features from decoder_features[level] will be fused
-        with low level feature maps from backbone. If `pyramid_fusion`,
-        multiscale features will be resized and fused at the target level.
+      feature_fusion: One of `deeplabv3plus`, `deeplabv3plus_sum_to_merge`,
+        `pyramid_fusion`, or None. If  `deeplabv3plus`, features from
+        decoder_features[level] will be fused with low level feature maps from
+        backbone. If `pyramid_fusion`, multiscale features will be resized and
+        fused at the target level.
      decoder_min_level: An `int` of minimum level from decoder to use in
        feature fusion. It is only used when feature_fusion is set to
        `panoptic_fpn_fusion`.
@@ -327,7 +337,9 @@ class SegmentationHeadQuantized(tf.keras.layers.Layer):
        'epsilon': self._config_dict['norm_epsilon'],
    }

-    if self._config_dict['feature_fusion'] == 'deeplabv3plus':
+    if self._config_dict['feature_fusion'] in [
+        FeatureFusion.DEEPLABV3PLUS, FeatureFusion.DEEPLABV3PLUS_SUM_TO_MERGE
+    ]:
      # Deeplabv3+ feature fusion layers.
      self._dlv3p_conv = helper.Conv2DQuantized(
          kernel_size=1,
@@ -384,10 +396,12 @@ class SegmentationHeadQuantized(tf.keras.layers.Layer):
        size=(self._config_dict['upsample_factor'],
              self._config_dict['upsample_factor']),
        interpolation='nearest')
-    self._resizing_layer = tf.keras.layers.Resizing(
+    self._resizing_layer = helper.ResizingQuantized(
        backbone_shape[1], backbone_shape[2], interpolation='bilinear')

    self._concat_layer = helper.ConcatenateQuantized(axis=self._bn_axis)
+    self._add_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf.keras.layers.Add(), configs.Default8BitQuantizeConfig([], [], True))

    super().build(input_shape)

@@ -412,14 +426,16 @@ class SegmentationHeadQuantized(tf.keras.layers.Layer):
      segmentation prediction mask: A `tf.Tensor` of the segmentation mask
        scores predicted from input features.
    """
-    if self._config_dict['feature_fusion'] in ('pyramid_fusion',
-                                               'panoptic_fpn_fusion'):
+    if self._config_dict['feature_fusion'] in (
+        FeatureFusion.PYRAMID_FUSION, FeatureFusion.PANOPTIC_FPN_FUSION):
      raise ValueError(
          'The feature fusion method `pyramid_fusion` is not supported in QAT.')

    backbone_output = inputs[0]
    decoder_output = inputs[1]
-    if self._config_dict['feature_fusion'] == 'deeplabv3plus':
+    if self._config_dict['feature_fusion'] in {
+        FeatureFusion.DEEPLABV3PLUS, FeatureFusion.DEEPLABV3PLUS_SUM_TO_MERGE
+    }:
      # deeplabv3+ feature fusion.
      x = decoder_output[str(self._config_dict['level'])] if isinstance(
          decoder_output, dict) else decoder_output
@@ -429,7 +445,10 @@ class SegmentationHeadQuantized(tf.keras.layers.Layer):
      y = self._activation_layer(y)
      x = self._resizing_layer(x)
      x = tf.cast(x, dtype=y.dtype)
-      x = self._concat_layer([x, y])
+      if self._config_dict['feature_fusion'] == FeatureFusion.DEEPLABV3PLUS:
+        x = self._concat_layer([x, y])
+      else:
+        x = self._add_layer([x, y])
    else:
      x = decoder_output[str(self._config_dict['level'])] if isinstance(
          decoder_output, dict) else decoder_output
@@ -620,7 +639,7 @@ class SpatialPyramidPoolingQuantized(nn_layers.SpatialPyramidPooling):
            kernel_regularizer=self._kernel_regularizer,
            use_bias=False,
            activation=helper.NoOpActivation()),
-        norm_with_quantize(
+        norm(
            axis=self._bn_axis,
            momentum=self._batchnorm_momentum,
            epsilon=self._batchnorm_epsilon)
@@ -649,7 +668,7 @@ class SpatialPyramidPoolingQuantized(nn_layers.SpatialPyramidPooling):
    x = self._concat_layer(result)
    for layer in self._projection:
      x = layer(x, training=training)
-    x = self._activation_fn_no_quant(x)
+    x = self._activation_fn(x)
    return self._dropout_layer(x)



--- a/official/projects/qat/vision/modeling/layers/nn_layers_test.py
+++ b/official/projects/qat/vision/modeling/layers/nn_layers_test.py
@@ -24,12 +24,15 @@ from official.projects.qat.vision.modeling.layers import nn_layers
 class NNLayersTest(parameterized.TestCase, tf.test.TestCase):

  @parameterized.parameters(
-      ('deeplabv3plus', 1),
-      ('deeplabv3plus', 2),
-      ('deeplabv3', 1),
-      ('deeplabv3', 2),
+      ('deeplabv3plus', 1, 128, 128),
+      ('deeplabv3plus', 2, 128, 128),
+      ('deeplabv3', 1, 128, 64),
+      ('deeplabv3', 2, 128, 64),
+      ('deeplabv3plus_sum_to_merge', 1, 64, 128),
+      ('deeplabv3plus_sum_to_merge', 2, 64, 128),
  )
-  def test_segmentation_head_creation(self, feature_fusion, upsample_factor):
+  def test_segmentation_head_creation(self, feature_fusion, upsample_factor,
+                                      low_level_num_filters, expected_shape):
    input_size = 128
    decoder_outupt_size = input_size // 2

@@ -42,14 +45,11 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
        level=4,
        upsample_factor=upsample_factor,
        low_level=2,
-        low_level_num_filters=128,
+        low_level_num_filters=low_level_num_filters,
        feature_fusion=feature_fusion)

    features = segmentation_head((backbone_output, decoder_output))

-    expected_shape = (
-        input_size
-        if feature_fusion == 'deeplabv3plus' else decoder_outupt_size)
    self.assertAllEqual([
        2, expected_shape * upsample_factor, expected_shape * upsample_factor, 5
    ], features.shape.as_list())

--- a/official/projects/qat/vision/quantization/__init__.py
+++ b/official/projects/qat/vision/quantization/__init__.py
@@ -13,6 +13,3 @@
 # limitations under the License.

 """Configs package definition."""
-
-from official.projects.qat.vision.quantization import configs
-from official.projects.qat.vision.quantization import schemes
--- a/official/projects/qat/vision/quantization/layer_transforms.py
+++ b/official/projects/qat/vision/quantization/layer_transforms.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains custom quantization layer transforms."""
+from typing import Type, Mapping
+
+import tensorflow as tf
+
+import tensorflow_model_optimization as tfmot
+from official.projects.qat.vision.modeling.layers import nn_blocks as quantized_nn_blocks
+from official.projects.qat.vision.modeling.layers import nn_layers as quantized_nn_layers
+from official.projects.qat.vision.quantization import configs
+
+keras = tf.keras
+LayerNode = tfmot.quantization.keras.graph_transformations.transforms.LayerNode
+LayerPattern = tfmot.quantization.keras.graph_transformations.transforms.LayerPattern
+
+_LAYER_NAMES = [
+    'Vision>Conv2DBNBlock', 'Vision>InvertedBottleneckBlock',
+    'Vision>SegmentationHead', 'Vision>SpatialPyramidPooling', 'Vision>ASPP'
+]
+
+_QUANTIZATION_WEIGHT_NAMES = [
+    'output_max', 'output_min', 'optimizer_step', 'kernel_min', 'kernel_max',
+    'add_three_min', 'add_three_max', 'divide_six_min', 'divide_six_max',
+    'depthwise_kernel_min', 'depthwise_kernel_max',
+    'reduce_mean_quantizer_vars_min', 'reduce_mean_quantizer_vars_max'
+]
+
+_ORIGINAL_WEIGHT_NAME = [
+    'kernel', 'depthwise_kernel', 'gamma', 'beta', 'moving_mean',
+    'moving_variance', 'bias'
+]
+
+
+class CustomLayerQuantize(
+    tfmot.quantization.keras.graph_transformations.transforms.Transform):
+  """Add QAT support for Keras Custom layer."""
+
+  def __init__(self, original_layer_pattern: str,
+               quantized_layer_class: Type[keras.layers.Layer]):
+    super(CustomLayerQuantize, self).__init__()
+    self._original_layer_pattern = original_layer_pattern
+    self._quantized_layer_class = quantized_layer_class
+
+  def pattern(self) -> LayerPattern:
+    """See base class."""
+    return LayerPattern(self._original_layer_pattern)
+
+  def _is_quantization_weight_name(self, name):
+    simple_name = name.split('/')[-1].split(':')[0]
+    if simple_name in _QUANTIZATION_WEIGHT_NAMES:
+      return True
+    if simple_name in _ORIGINAL_WEIGHT_NAME:
+      return False
+    raise ValueError('Variable name {} is not supported on '
+                     'CustomLayerQuantize({}) transform.'.format(
+                         simple_name, self._original_layer_pattern))
+
+  def _create_layer_metadata(
+      self, layer_class_name: str
+  ) -> Mapping[str, tfmot.quantization.keras.QuantizeConfig]:
+    if layer_class_name in _LAYER_NAMES:
+      layer_metadata = {'quantize_config': configs.NoOpQuantizeConfig()}
+    else:
+      layer_metadata = {
+          'quantize_config': configs.Default8BitOutputQuantizeConfig()
+      }
+    return layer_metadata
+
+  def replacement(self, match_layer: LayerNode) -> LayerNode:
+    """See base class."""
+    bottleneck_layer = match_layer.layer
+    bottleneck_config = bottleneck_layer['config']
+    bottleneck_names_and_weights = list(match_layer.names_and_weights)
+    quantized_layer = self._quantized_layer_class(**bottleneck_config)
+    dummy_input_shape = [1, 64, 128, 1]
+    # SegmentationHead layer requires a tuple of 2 tensors.
+    if isinstance(quantized_layer,
+                  quantized_nn_layers.SegmentationHeadQuantized):
+      dummy_input_shape = ([1, 1, 1, 1], [1, 1, 1, 1])
+    quantized_layer.compute_output_shape(dummy_input_shape)
+    quantized_names_and_weights = zip(
+        [weight.name for weight in quantized_layer.weights],
+        quantized_layer.get_weights())
+    match_idx = 0
+    names_and_weights = []
+    for name_and_weight in quantized_names_and_weights:
+      if not self._is_quantization_weight_name(name=name_and_weight[0]):
+        name_and_weight = bottleneck_names_and_weights[match_idx]
+        match_idx = match_idx + 1
+      names_and_weights.append(name_and_weight)
+
+    if match_idx != len(bottleneck_names_and_weights):
+      raise ValueError('{}/{} of Bottleneck weights is transformed.'.format(
+          match_idx, len(bottleneck_names_and_weights)))
+    quantized_layer_config = keras.layers.serialize(quantized_layer)
+    quantized_layer_config['name'] = quantized_layer_config['config']['name']
+
+    layer_metadata = self._create_layer_metadata(bottleneck_layer['class_name'])
+
+    return LayerNode(
+        quantized_layer_config,
+        metadata=layer_metadata,
+        names_and_weights=names_and_weights)
+
+
+CUSTOM_TRANSFORMS = [
+    CustomLayerQuantize('Vision>BottleneckBlock',
+                        quantized_nn_blocks.BottleneckBlockQuantized),
+    CustomLayerQuantize('Vision>InvertedBottleneckBlock',
+                        quantized_nn_blocks.InvertedBottleneckBlockQuantized),
+    CustomLayerQuantize('Vision>Conv2DBNBlock',
+                        quantized_nn_blocks.Conv2DBNBlockQuantized),
+    CustomLayerQuantize('Vision>SegmentationHead',
+                        quantized_nn_layers.SegmentationHeadQuantized),
+    CustomLayerQuantize('Vision>SpatialPyramidPooling',
+                        quantized_nn_layers.SpatialPyramidPoolingQuantized),
+    CustomLayerQuantize('Vision>ASPP', quantized_nn_layers.ASPPQuantized)
+]
--- a/official/projects/qat/vision/quantization/schemes.py
+++ b/official/projects/qat/vision/quantization/schemes.py
@@ -13,107 +13,13 @@
 # limitations under the License.

 """Quantization schemes."""
-from typing import Type
-
 # Import libraries

-import tensorflow as tf
-
 import tensorflow_model_optimization as tfmot
-from official.projects.qat.vision.modeling.layers import nn_blocks as quantized_nn_blocks
-from official.projects.qat.vision.modeling.layers import nn_layers as quantized_nn_layers
-from official.projects.qat.vision.quantization import configs
+from official.projects.qat.vision.quantization import layer_transforms


-keras = tf.keras
 default_8bit_transforms = tfmot.quantization.keras.default_8bit.default_8bit_transforms
-LayerNode = tfmot.quantization.keras.graph_transformations.transforms.LayerNode
-LayerPattern = tfmot.quantization.keras.graph_transformations.transforms.LayerPattern
-
-_QUANTIZATION_WEIGHT_NAMES = [
-    'output_max', 'output_min', 'optimizer_step',
-    'kernel_min', 'kernel_max',
-    'add_three_min', 'add_three_max',
-    'divide_six_min', 'divide_six_max',
-    'depthwise_kernel_min', 'depthwise_kernel_max',
-    'reduce_mean_quantizer_vars_min', 'reduce_mean_quantizer_vars_max']
-
-_ORIGINAL_WEIGHT_NAME = [
-    'kernel', 'depthwise_kernel',
-    'gamma', 'beta', 'moving_mean', 'moving_variance',
-    'bias']
-
-
-class CustomLayerQuantize(
-    tfmot.quantization.keras.graph_transformations.transforms.Transform):
-  """Add QAT support for Keras Custom layer."""
-
-  def __init__(self,
-               original_layer_pattern: str,
-               quantized_layer_class: Type[keras.layers.Layer]):
-    super(CustomLayerQuantize, self).__init__()
-    self._original_layer_pattern = original_layer_pattern
-    self._quantized_layer_class = quantized_layer_class
-
-  def pattern(self) -> LayerPattern:
-    """See base class."""
-    return LayerPattern(self._original_layer_pattern)
-
-  def _is_quantization_weight_name(self, name):
-    simple_name = name.split('/')[-1].split(':')[0]
-    if simple_name in _QUANTIZATION_WEIGHT_NAMES:
-      return True
-    if simple_name in _ORIGINAL_WEIGHT_NAME:
-      return False
-    raise ValueError('Variable name {} is not supported on '
-                     'CustomLayerQuantize({}) transform.'.format(
-                         simple_name,
-                         self._original_layer_pattern))
-
-  def replacement(self, match_layer: LayerNode) -> LayerNode:
-    """See base class."""
-    bottleneck_layer = match_layer.layer
-    bottleneck_config = bottleneck_layer['config']
-    bottleneck_names_and_weights = list(match_layer.names_and_weights)
-    quantized_layer = self._quantized_layer_class(
-        **bottleneck_config)
-    dummy_input_shape = [1, 64, 128, 1]
-    # SegmentationHead layer requires a tuple of 2 tensors.
-    if isinstance(quantized_layer,
-                  quantized_nn_layers.SegmentationHeadQuantized):
-      dummy_input_shape = ([1, 1, 1, 1], [1, 1, 1, 1])
-    quantized_layer.compute_output_shape(dummy_input_shape)
-    quantized_names_and_weights = zip(
-        [weight.name for weight in quantized_layer.weights],
-        quantized_layer.get_weights())
-    match_idx = 0
-    names_and_weights = []
-    for name_and_weight in quantized_names_and_weights:
-      if not self._is_quantization_weight_name(name=name_and_weight[0]):
-        name_and_weight = bottleneck_names_and_weights[match_idx]
-        match_idx = match_idx + 1
-      names_and_weights.append(name_and_weight)
-
-    if match_idx != len(bottleneck_names_and_weights):
-      raise ValueError('{}/{} of Bottleneck weights is transformed.'.format(
-          match_idx, len(bottleneck_names_and_weights)))
-    quantized_layer_config = keras.layers.serialize(quantized_layer)
-    quantized_layer_config['name'] = quantized_layer_config['config']['name']
-    if bottleneck_layer['class_name'] in [
-        'Vision>Conv2DBNBlock', 'Vision>InvertedBottleneckBlock',
-        'Vision>SegmentationHead', 'Vision>SpatialPyramidPooling',
-        'Vision>ASPP'
-    ]:
-      layer_metadata = {'quantize_config': configs.NoOpQuantizeConfig()}
-    else:
-      layer_metadata = {
-          'quantize_config': configs.Default8BitOutputQuantizeConfig()
-      }
-
-    return LayerNode(
-        quantized_layer_config,
-        metadata=layer_metadata,
-        names_and_weights=names_and_weights)


 class QuantizeLayoutTransform(
@@ -155,20 +61,9 @@ class QuantizeLayoutTransform(
        default_8bit_transforms.ConcatTransform3Inputs(),
        default_8bit_transforms.ConcatTransform(),
        default_8bit_transforms.LayerReLUQuantize(),
-        default_8bit_transforms.LayerReluActivationQuantize(),
-        CustomLayerQuantize('Vision>BottleneckBlock',
-                            quantized_nn_blocks.BottleneckBlockQuantized),
-        CustomLayerQuantize(
-            'Vision>InvertedBottleneckBlock',
-            quantized_nn_blocks.InvertedBottleneckBlockQuantized),
-        CustomLayerQuantize('Vision>Conv2DBNBlock',
-                            quantized_nn_blocks.Conv2DBNBlockQuantized),
-        CustomLayerQuantize('Vision>SegmentationHead',
-                            quantized_nn_layers.SegmentationHeadQuantized),
-        CustomLayerQuantize('Vision>SpatialPyramidPooling',
-                            quantized_nn_layers.SpatialPyramidPoolingQuantized),
-        CustomLayerQuantize('Vision>ASPP', quantized_nn_layers.ASPPQuantized)
+        default_8bit_transforms.LayerReluActivationQuantize()
    ]
+    transforms += layer_transforms.CUSTOM_TRANSFORMS
    return tfmot.quantization.keras.graph_transformations.model_transformer.ModelTransformer(
        model, transforms,
        set(layer_quantize_map.keys()), layer_quantize_map).transform()

--- a/official/projects/qat/vision/tasks/image_classification_test.py
+++ b/official/projects/qat/vision/tasks/image_classification_test.py
@@ -15,6 +15,8 @@
 """Tests for image classification task."""

 # pylint: disable=unused-import
+import os
+
 from absl.testing import parameterized
 import orbit
 import tensorflow as tf
@@ -23,16 +25,31 @@ from official import vision
 from official.core import exp_factory
 from official.modeling import optimization
 from official.projects.qat.vision.tasks import image_classification as img_cls_task
+from official.vision.dataloaders import tfexample_utils


 class ImageClassificationTaskTest(tf.test.TestCase, parameterized.TestCase):

+  def _create_test_tfrecord(self, tfrecord_file, example, num_samples):
+    examples = [example] * num_samples
+    tfexample_utils.dump_to_tfrecord(
+        record_file=tfrecord_file, tf_examples=examples)
+
  @parameterized.parameters(('resnet_imagenet_qat'),
                            ('mobilenet_imagenet_qat'))
  def test_task(self, config_name):
+    input_image_size = [224, 224]
+    test_tfrecord_file = os.path.join(self.get_temp_dir(), 'cls_test.tfrecord')
+    example = tf.train.Example.FromString(
+        tfexample_utils.create_classification_example(
+            image_height=input_image_size[0], image_width=input_image_size[1]))
+    self._create_test_tfrecord(
+        tfrecord_file=test_tfrecord_file, example=example, num_samples=10)
+
    config = exp_factory.get_exp_config(config_name)
    config.task.train_data.global_batch_size = 2
-
+    config.task.validation_data.input_path = test_tfrecord_file
+    config.task.train_data.input_path = test_tfrecord_file
    task = img_cls_task.ImageClassificationTask(config.task)
    model = task.build_model()
    metrics = task.build_metrics()

--- a/official/projects/qat/vision/tasks/retinanet_test.py
+++ b/official/projects/qat/vision/tasks/retinanet_test.py
@@ -14,6 +14,8 @@

 """Tests for RetinaNet task."""
 # pylint: disable=unused-import
+import os
+
 from absl.testing import parameterized
 import orbit
 import tensorflow as tf
@@ -23,16 +25,31 @@ from official.core import exp_factory
 from official.modeling import optimization
 from official.projects.qat.vision.tasks import retinanet
 from official.vision.configs import retinanet as exp_cfg
+from official.vision.dataloaders import tfexample_utils


 class RetinaNetTaskTest(parameterized.TestCase, tf.test.TestCase):

+  def _create_test_tfrecord(self, tfrecord_file, example, num_samples):
+    examples = [example] * num_samples
+    tfexample_utils.dump_to_tfrecord(
+        record_file=tfrecord_file, tf_examples=examples)
+
  @parameterized.parameters(
      ('retinanet_spinenet_mobile_coco_qat', True),
      ('retinanet_spinenet_mobile_coco_qat', False),
  )
  def test_retinanet_task(self, test_config, is_training):
    """RetinaNet task test for training and val using toy configs."""
+    input_image_size = [384, 384]
+    test_tfrecord_file = os.path.join(self.get_temp_dir(), 'det_test.tfrecord')
+    example = tfexample_utils.create_detection_test_example(
+        image_height=input_image_size[0],
+        image_width=input_image_size[1],
+        image_channel=3,
+        num_instances=10)
+    self._create_test_tfrecord(
+        tfrecord_file=test_tfrecord_file, example=example, num_samples=10)
    config = exp_factory.get_exp_config(test_config)
    # modify config to suit local testing
    config.task.model.input_size = [128, 128, 3]
@@ -41,6 +58,9 @@ class RetinaNetTaskTest(parameterized.TestCase, tf.test.TestCase):
    config.task.validation_data.global_batch_size = 1
    config.task.train_data.shuffle_buffer_size = 2
    config.task.validation_data.shuffle_buffer_size = 2
+    config.task.validation_data.input_path = test_tfrecord_file
+    config.task.train_data.input_path = test_tfrecord_file
+    config.task.annotation_file = None
    config.train_steps = 1

    task = retinanet.RetinaNetTask(config.task)

--- a/official/projects/teams/teams_experiments_test.py
+++ b/official/projects/teams/teams_experiments_test.py
@@ -12,24 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-"""Tests for teams_experiments."""
+"""TensorFlow Model Garden Teams training driver, register Teams configs."""

-from absl.testing import parameterized
-import tensorflow as tf
-
-from official.common import registry_imports  # pylint: disable=unused-import
-from official.core import config_definitions as cfg
-from official.core import exp_factory
-
-
-class TeamsExperimentsTest(tf.test.TestCase, parameterized.TestCase):
-
-  @parameterized.parameters(('teams/pretraining',))
-  def test_teams_experiments(self, config_name):
-    config = exp_factory.get_exp_config(config_name)
-    self.assertIsInstance(config, cfg.ExperimentConfig)
-    self.assertIsInstance(config.task.train_data, cfg.DataConfig)
+# pylint: disable=unused-import
+from absl import app

+from official.common import flags as tfm_flags
+from official.nlp import tasks
+from official.nlp import train
+from official.projects.teams import teams_experiments
+from official.projects.teams import teams_task

 if __name__ == '__main__':
-  tf.test.main()
+  tfm_flags.define_flags()
+  app.run(train.main)
--- a/official/projects/vit/modeling/vit.py
+++ b/official/projects/vit/modeling/vit.py
@@ -13,6 +13,8 @@
 # limitations under the License.

 """VisionTransformer models."""
+
+import immutabledict
 import tensorflow as tf

 from official.modeling import activations
@@ -23,7 +25,7 @@ from official.vision.modeling.layers import nn_layers

 layers = tf.keras.layers

-VIT_SPECS = {
+VIT_SPECS = immutabledict.immutabledict({
    'vit-ti16':
        dict(
            hidden_size=192,
@@ -72,7 +74,7 @@ VIT_SPECS = {
            patch_size=14,
            transformer=dict(mlp_dim=8192, num_heads=16, num_layers=48),
        ),
-}
+})


 class AddPositionEmbs(tf.keras.layers.Layer):