Merge branch 'tensorflow:master' into panoptic-deeplab-modeling

7479dbb8 · Srihari Humbarwadi · GitHub · 8b60a5a8 · 9c8cbd0c · 7479dbb8
Unverified Commit 7479dbb8 authored Feb 15, 2022 by Srihari Humbarwadi Committed by GitHub Feb 15, 2022
20 changed files
--- a/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu.yaml
+++ b/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu.yaml
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float32'
+  loss_scale: 'dynamic'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 256
+    dtype: 'float32'
+  validation_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 256
+    dtype: 'float32'
+    drop_remainder: false
+  quantization:
+    pretrained_original_checkpoint: 'gs://**/resnet_classifier_gpu/ckpt-56160'
+trainer:
+  # With below setting, the accuracy of QAT reaches to Top1-accuracy 0.7720 after 5 days of training
+  # with 8GPUs, which is higher than the non-quantized float32 version Resnet.
+  train_steps: 449280
+  validation_steps: 200
+  validation_interval: 5000
+  steps_per_loop: 5000
+  summary_interval: 5000
+  checkpoint_interval: 5000
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'stepwise'
+      stepwise:
+        boundaries: [150000, 300000, 400000]
+        values: [0.08, 0.008, 0.0008, 0.00008]
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 40000
--- a/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu_fast.yaml
+++ b/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu_fast.yaml
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float32'
+  loss_scale: 'dynamic'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 256
+    dtype: 'float32'
+  validation_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 256
+    dtype: 'float32'
+    drop_remainder: false
+  quantization:
+    pretrained_original_checkpoint: 'gs://**/resnet_classifier_gpu/ckpt-56160'
+trainer:
+  # With below setting, the accuracy of QAT reaches to the non-quantized float32 version after
+  # around 160k steps, which takes 1d 15h with 8 GPUS.
+  train_steps: 449280
+  validation_steps: 200
+  validation_interval: 5000
+  steps_per_loop: 5000
+  summary_interval: 5000
+  checkpoint_interval: 5000
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'exponential'
+      exponential:
+        initial_learning_rate: 0.016
+        decay_steps: 25000
+        decay_rate: 0.5
+        staircase: true
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1000
--- a/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu_fast_4x4.yaml
+++ b/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu_fast_4x4.yaml
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float32'
+  loss_scale: 'dynamic'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 256
+    dtype: 'float32'
+  validation_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 256
+    dtype: 'float32'
+    drop_remainder: false
+  quantization:
+    pretrained_original_checkpoint: 'gs://**/resnet_classifier_gpu/ckpt-56160'
+    change_num_bits: true
+    num_bits_weight: 4
+    num_bits_activation: 4
+trainer:
+  # With below setting, the accuracy of QAT reaches Top1-accuracy 0.6822 at 205k steps with 8GPUs.
+  # TODO: Please change the configs when training is done.
+  train_steps: 449280
+  validation_steps: 200
+  validation_interval: 5000
+  steps_per_loop: 5000
+  summary_interval: 5000
+  checkpoint_interval: 5000
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'exponential'
+      exponential:
+        initial_learning_rate: 0.016
+        decay_steps: 25000
+        decay_rate: 0.5
+        staircase: true
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1000
--- a/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu_fast_4x8.yaml
+++ b/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu_fast_4x8.yaml
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float32'
+  loss_scale: 'dynamic'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 256
+    dtype: 'float32'
+  validation_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 256
+    dtype: 'float32'
+    drop_remainder: false
+  quantization:
+    pretrained_original_checkpoint: 'gs://**/resnet_classifier_gpu/ckpt-56160'
+    change_num_bits: true
+    num_bits_weight: 4
+    num_bits_activation: 8
+trainer:
+  # With below setting, the accuracy of QAT reaches Top1-accuracy 0.7575 at 220k steps with 8GPUs.
+  # TODO: Please change the configs when training is done.
+  train_steps: 449280
+  validation_steps: 200
+  validation_interval: 5000
+  steps_per_loop: 5000
+  summary_interval: 5000
+  checkpoint_interval: 5000
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'exponential'
+      exponential:
+        initial_learning_rate: 0.016
+        decay_steps: 25000
+        decay_rate: 0.5
+        staircase: true
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1000
--- a/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu_fast_6x6.yaml
+++ b/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu_fast_6x6.yaml
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float32'
+  loss_scale: 'dynamic'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 256
+    dtype: 'float32'
+  validation_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 256
+    dtype: 'float32'
+    drop_remainder: false
+  quantization:
+    pretrained_original_checkpoint: 'gs://**/resnet_classifier_gpu/ckpt-56160'
+    change_num_bits: true
+    num_bits_weight: 6
+    num_bits_activation: 6
+trainer:
+  # With below setting, the accuracy of QAT reaches Top1-accuracy 0.7607 at 190k steps with 8GPUs.
+  # TODO: Please change the configs when training is done.
+  train_steps: 449280
+  validation_steps: 200
+  validation_interval: 5000
+  steps_per_loop: 5000
+  summary_interval: 5000
+  checkpoint_interval: 5000
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'exponential'
+      exponential:
+        initial_learning_rate: 0.016
+        decay_steps: 25000
+        decay_rate: 0.5
+        staircase: true
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1000
--- a/official/projects/qat/vision/configs/experiments/retinanet/coco_spinenet49_mobile_qat_gpu.yaml
+++ b/official/projects/qat/vision/configs/experiments/retinanet/coco_spinenet49_mobile_qat_gpu.yaml
+# --experiment_type=retinanet_spinenet_mobile_coco_qat
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float32'
+task:
+  losses:
+    l2_weight_decay: 3.0e-05
+  model:
+    anchor:
+      anchor_size: 3
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      spinenet_mobile:
+        stochastic_depth_drop_rate: 0.2
+        model_id: '49'
+        se_ratio: 0.2
+        use_keras_upsampling_2d: true
+      type: 'spinenet_mobile'
+    decoder:
+      type: 'identity'
+    head:
+      num_convs: 4
+      num_filters: 48
+      use_separable_conv: true
+    input_size: [384, 384, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: 'swish'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'float32'
+    global_batch_size: 128
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.5
+  validation_data:
+    dtype: 'float32'
+    global_batch_size: 8
+    is_training: false
+  quantization:
+    pretrained_original_checkpoint: 'gs://**/coco_spinenet49_mobile_tpu/ckpt-277200'
+trainer:
+  checkpoint_interval: 924
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [531300, 545160]
+        values: [0.0016, 0.00016, 0.000016]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0000335
+        warmup_steps: 4000
+  steps_per_loop: 924
+  train_steps: 554400
+  validation_interval: 924
+  validation_steps: 1250
+  summary_interval: 924
--- a/official/projects/qat/vision/configs/experiments/semantic_segmentation/deeplabv3_mobilenetv2_pascal_qat_gpu.yaml
+++ b/official/projects/qat/vision/configs/experiments/semantic_segmentation/deeplabv3_mobilenetv2_pascal_qat_gpu.yaml
+# --experiment_type=mnv2_deeplabv3_pascal_qat
+# Use 8 v100 GPUs for training and 4 v100 GPUs for eval.
+# mIoU (unquantized fp32): 74.78
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float32'
+  loss_scale: 'dynamic'
+task:
+  model:
+    num_classes: 21
+    input_size: [512, 512, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetV2'
+        output_stride: 16
+    decoder:
+      aspp:
+        dilation_rates: []
+        level: 4
+        pool_kernel_size: null
+        output_tensor: true
+      type: 'aspp'
+    head:
+      feature_fusion: null
+      num_convs: 0
+    norm_activation:
+      activation: relu
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  losses:
+    l2_weight_decay: 4.0e-07  # 1/100 of original value.
+  train_data:
+    output_size: [512, 512]
+    crop_size: [512, 512]
+    input_path: 'gs://**/pascal_voc_seg/train_aug*'
+    is_training: true
+    global_batch_size: 16
+    dtype: 'float32'
+    aug_rand_hflip: true
+    aug_scale_max: 2.0
+    aug_scale_min: 0.5
+  validation_data:
+    output_size: [512, 512]
+    input_path: 'gs://**/pascal_voc_seg/val*'
+    is_training: false
+    global_batch_size: 16
+    dtype: 'float32'
+    drop_remainder: false
+    resize_eval_groundtruth: false
+    groundtruth_padded_size: [512, 512]
+  quantization:
+    pretrained_original_checkpoint: 'gs://**/deeplabv3_mobilenetv2_pascal_coco_0.21/29808901/best_ckpt/best_ckpt-54'
+  init_checkpoint: null
+trainer:
+  optimizer_config:
+    learning_rate:
+      polynomial:
+        decay_steps: 13240
+        initial_learning_rate: 0.00007  # 1/100 of original lr.
+        power: 0.9
+      type: polynomial
+    optimizer:
+      sgd:
+        momentum: 0.9
+      type: sgd
+    warmup:
+      linear:
+        name: linear
+        warmup_steps: 0  # No warmup
+      type: linear
+  best_checkpoint_eval_metric: 'mean_iou'
+  best_checkpoint_export_subdir: 'best_ckpt'
+  best_checkpoint_metric_comp: 'higher'
+  steps_per_loop: 662
+  summary_interval: 662
+  train_steps: 13240
+  validation_interval: 662
+  validation_steps: 90
+  checkpoint_interval: 662
--- a/official/projects/qat/vision/configs/experiments/semantic_segmentation/deeplabv3_mobilenetv2_pascal_qat_tpu.yaml
+++ b/official/projects/qat/vision/configs/experiments/semantic_segmentation/deeplabv3_mobilenetv2_pascal_qat_tpu.yaml
+# --experiment_type=mnv2_deeplabv3_pascal_qat
+# Use 4x2 DF for training and eval.
+# mIoU (unquantized fp32): 74.69
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'float32'
+task:
+  model:
+    num_classes: 21
+    input_size: [512, 512, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetV2'
+        output_stride: 16
+    decoder:
+      aspp:
+        dilation_rates: []
+        level: 4
+        pool_kernel_size: null
+        output_tensor: true
+      type: 'aspp'
+    head:
+      feature_fusion: null
+      num_convs: 0
+    norm_activation:
+      activation: relu
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  losses:
+    l2_weight_decay: 4.0e-07  # 1/100 of original value.
+  train_data:
+    output_size: [512, 512]
+    crop_size: [512, 512]
+    input_path: 'gs://**/pascal_voc_seg/train_aug*'
+    is_training: true
+    global_batch_size: 16
+    dtype: 'float32'
+    aug_rand_hflip: true
+    aug_scale_max: 2.0
+    aug_scale_min: 0.5
+  validation_data:
+    output_size: [512, 512]
+    input_path: 'gs://**/pascal_voc_seg/val*'
+    is_training: false
+    global_batch_size: 16
+    dtype: 'float32'
+    drop_remainder: false
+    resize_eval_groundtruth: false
+    groundtruth_padded_size: [512, 512]
+  quantization:
+    pretrained_original_checkpoint: 'gs://**/deeplabv3_mobilenetv2_pascal_coco_0.21/29808901/best_ckpt/best_ckpt-54'
+  init_checkpoint: null
+trainer:
+  optimizer_config:
+    learning_rate:
+      polynomial:
+        decay_steps: 13240
+        initial_learning_rate: 0.00007  # 1/100 of original lr.
+        power: 0.9
+      type: polynomial
+    optimizer:
+      sgd:
+        momentum: 0.9
+      type: sgd
+    warmup:
+      linear:
+        name: linear
+        warmup_steps: 0  # No warmup
+      type: linear
+  best_checkpoint_eval_metric: 'mean_iou'
+  best_checkpoint_export_subdir: 'best_ckpt'
+  best_checkpoint_metric_comp: 'higher'
+  steps_per_loop: 662
+  summary_interval: 662
+  train_steps: 13240
+  validation_interval: 662
+  validation_steps: 90
+  checkpoint_interval: 662
--- a/official/projects/qat/vision/configs/experiments/semantic_segmentation/deeplabv3plus_mobilenetv2_cityscapes_qat_tpu.yaml
+++ b/official/projects/qat/vision/configs/experiments/semantic_segmentation/deeplabv3plus_mobilenetv2_cityscapes_qat_tpu.yaml
+# --experiment_type=mnv2_deeplabv3plus_cityscapes_qat
+# Use 4x2 DF for training and eval.
+# mIoU (unquantized fp32): 73.84
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'float32'
+task:
+  model:
+    num_classes: 19
+    input_size: [1024, 2048, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetV2'
+        output_stride: 16
+        output_intermediate_endpoints: true
+    decoder:
+      aspp:
+        dilation_rates: []
+        level: 4
+        pool_kernel_size: [512, 1024]
+        output_tensor: true
+      type: 'aspp'
+    head:
+      feature_fusion: 'deeplabv3plus'
+      low_level: '2/depthwise'
+      low_level_num_filters: 48
+      level: 4
+      num_convs: 2
+      use_depthwise_convolution: true
+    norm_activation:
+      activation: relu
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  losses:
+    l2_weight_decay: 4.0e-07  # 1/100 of original value.
+  train_data:
+    output_size: [1024, 2048]
+    crop_size: []
+    input_path: ''
+    tfds_name: 'cityscapes/semantic_segmentation'
+    tfds_split: 'train'
+    is_training: true
+    global_batch_size: 16
+    dtype: 'float32'
+    aug_rand_hflip: true
+    aug_scale_max: 2.0
+    aug_scale_min: 0.5
+  validation_data:
+    output_size: [1024, 2048]
+    input_path: ''
+    tfds_name: 'cityscapes/semantic_segmentation'
+    tfds_split: 'validation'
+    is_training: false
+    global_batch_size: 16
+    dtype: 'float32'
+    drop_remainder: false
+    resize_eval_groundtruth: true
+  quantization:
+    pretrained_original_checkpoint: 'gs://**/deeplabv3plus_mobilenetv2_cityscapes/29814723/best_ckpt/best_ckpt-408'
+  init_checkpoint: null
+trainer:
+  optimizer_config:
+    learning_rate:
+      polynomial:
+        decay_steps: 20000
+        initial_learning_rate: 0.0001  # 1/100 of original lr.
+        power: 0.9
+      type: polynomial
+    optimizer:
+      sgd:
+        momentum: 0.9
+      type: sgd
+    warmup:
+      linear:
+        name: linear
+        warmup_learning_rate: 0
+        warmup_steps: 0  # No warmup
+      type: linear
+  steps_per_loop: 185
+  summary_interval: 185
+  train_steps: 20000
+  validation_interval: 185
+  validation_steps: 31
+  checkpoint_interval: 185
+  best_checkpoint_export_subdir: 'best_ckpt'
+  best_checkpoint_eval_metric: 'mean_iou'
+  best_checkpoint_metric_comp: 'higher'
--- a/official/projects/qat/vision/configs/image_classification.py
+++ b/official/projects/qat/vision/configs/image_classification.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Image classification configuration definition."""
+
+import dataclasses
+from typing import Optional
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.qat.vision.configs import common
+from official.vision.beta.configs import image_classification
+
+
+@dataclasses.dataclass
+class ImageClassificationTask(image_classification.ImageClassificationTask):
+  quantization: Optional[common.Quantization] = None
+
+
+@exp_factory.register_config_factory('resnet_imagenet_qat')
+def image_classification_imagenet() -> cfg.ExperimentConfig:
+  """Builds an image classification config for the resnet with QAT."""
+  config = image_classification.image_classification_imagenet()
+  task = ImageClassificationTask.from_args(
+      quantization=common.Quantization(), **config.task.as_dict())
+  config.task = task
+
+  return config
+
+
+@exp_factory.register_config_factory('mobilenet_imagenet_qat')
+def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
+  """Builds an image classification config for the mobilenetV2 with QAT."""
+  config = image_classification.image_classification_imagenet_mobilenet()
+  task = ImageClassificationTask.from_args(
+      quantization=common.Quantization(), **config.task.as_dict())
+  config.task = task
+
+  return config
--- a/official/projects/qat/vision/configs/image_classification_test.py
+++ b/official/projects/qat/vision/configs/image_classification_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for image_classification."""
+# pylint: disable=unused-import
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.qat.vision.configs import common
+from official.projects.qat.vision.configs import image_classification as qat_exp_cfg
+from official.vision import beta
+from official.vision.beta.configs import image_classification as exp_cfg
+
+
+class ImageClassificationConfigTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(
+      ('resnet_imagenet_qat',),
+      ('mobilenet_imagenet_qat',),
+  )
+  def test_image_classification_configs(self, config_name):
+    config = exp_factory.get_exp_config(config_name)
+    self.assertIsInstance(config, cfg.ExperimentConfig)
+    self.assertIsInstance(config.task, qat_exp_cfg.ImageClassificationTask)
+    self.assertIsInstance(config.task.model,
+                          exp_cfg.ImageClassificationModel)
+    self.assertIsInstance(config.task.quantization, common.Quantization)
+    self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
+    config.task.train_data.is_training = None
+    with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
+      config.validate()
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/configs/retinanet.py
+++ b/official/projects/qat/vision/configs/retinanet.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""RetinaNet configuration definition."""
+import dataclasses
+from typing import Optional
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.qat.vision.configs import common
+from official.vision.beta.configs import retinanet
+from official.vision.beta.configs.google import backbones
+
+
+@dataclasses.dataclass
+class RetinaNetTask(retinanet.RetinaNetTask):
+  quantization: Optional[common.Quantization] = None
+
+
+@exp_factory.register_config_factory('retinanet_spinenet_mobile_coco_qat')
+def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig:
+  """Generates a config for COCO OD RetinaNet for mobile with QAT."""
+  config = retinanet.retinanet_spinenet_mobile_coco()
+  task = RetinaNetTask.from_args(
+      quantization=common.Quantization(), **config.task.as_dict())
+  task.model.backbone = backbones.Backbone(
+      type='spinenet_mobile',
+      spinenet_mobile=backbones.SpineNetMobile(
+          model_id='49',
+          stochastic_depth_drop_rate=0.2,
+          min_level=3,
+          max_level=7,
+          use_keras_upsampling_2d=True))
+  config.task = task
+
+  return config
--- a/official/projects/qat/vision/configs/retinanet_test.py
+++ b/official/projects/qat/vision/configs/retinanet_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for retinanet."""
+# pylint: disable=unused-import
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.qat.vision.configs import common
+from official.projects.qat.vision.configs import retinanet as qat_exp_cfg
+from official.vision import beta
+from official.vision.beta.configs import retinanet as exp_cfg
+
+
+class RetinaNetConfigTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(
+      ('retinanet_spinenet_mobile_coco_qat',),
+  )
+  def test_retinanet_configs(self, config_name):
+    config = exp_factory.get_exp_config(config_name)
+    self.assertIsInstance(config, cfg.ExperimentConfig)
+    self.assertIsInstance(config.task, qat_exp_cfg.RetinaNetTask)
+    self.assertIsInstance(config.task.model, exp_cfg.RetinaNet)
+    self.assertIsInstance(config.task.quantization, common.Quantization)
+    self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
+    config.validate()
+    config.task.train_data.is_training = None
+    with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
+      config.validate()
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/configs/semantic_segmentation.py
+++ b/official/projects/qat/vision/configs/semantic_segmentation.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""RetinaNet configuration definition."""
+import dataclasses
+from typing import Optional
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.qat.vision.configs import common
+from official.vision.beta.configs import semantic_segmentation
+
+
+@dataclasses.dataclass
+class SemanticSegmentationTask(semantic_segmentation.SemanticSegmentationTask):
+  quantization: Optional[common.Quantization] = None
+
+
+@exp_factory.register_config_factory('mnv2_deeplabv3_pascal_qat')
+def mnv2_deeplabv3_pascal() -> cfg.ExperimentConfig:
+  """Generates a config for MobileNet v2 + deeplab v3 with QAT."""
+  config = semantic_segmentation.mnv2_deeplabv3_pascal()
+  task = SemanticSegmentationTask.from_args(
+      quantization=common.Quantization(), **config.task.as_dict())
+  config.task = task
+  return config
+
+
+@exp_factory.register_config_factory('mnv2_deeplabv3_cityscapes_qat')
+def mnv2_deeplabv3_cityscapes() -> cfg.ExperimentConfig:
+  """Generates a config for MobileNet v2 + deeplab v3 with QAT."""
+  config = semantic_segmentation.mnv2_deeplabv3_cityscapes()
+  task = SemanticSegmentationTask.from_args(
+      quantization=common.Quantization(), **config.task.as_dict())
+  config.task = task
+  return config
+
+
+@exp_factory.register_config_factory('mnv2_deeplabv3plus_cityscapes_qat')
+def mnv2_deeplabv3plus_cityscapes() -> cfg.ExperimentConfig:
+  """Generates a config for MobileNet v2 + deeplab v3+ with QAT."""
+  config = semantic_segmentation.mnv2_deeplabv3plus_cityscapes()
+  task = SemanticSegmentationTask.from_args(
+      quantization=common.Quantization(), **config.task.as_dict())
+  config.task = task
+  return config
--- a/official/projects/qat/vision/configs/semantic_segmentation_test.py
+++ b/official/projects/qat/vision/configs/semantic_segmentation_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for retinanet."""
+# pylint: disable=unused-import
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.qat.vision.configs import common
+from official.projects.qat.vision.configs import semantic_segmentation as qat_exp_cfg
+from official.vision import beta
+from official.vision.beta.configs import semantic_segmentation as exp_cfg
+
+
+class SemanticSegmentationConfigTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(('mnv2_deeplabv3_pascal_qat',),
+                            ('mnv2_deeplabv3_cityscapes_qat',),
+                            ('mnv2_deeplabv3plus_cityscapes_qat'))
+  def test_semantic_segmentation_configs(self, config_name):
+    config = exp_factory.get_exp_config(config_name)
+    self.assertIsInstance(config, cfg.ExperimentConfig)
+    self.assertIsInstance(config.task, qat_exp_cfg.SemanticSegmentationTask)
+    self.assertIsInstance(config.task.model, exp_cfg.SemanticSegmentationModel)
+    self.assertIsInstance(config.task.quantization, common.Quantization)
+    self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
+    config.validate()
+    config.task.train_data.is_training = None
+    with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
+      config.validate()
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/modeling/__init__.py
+++ b/official/projects/qat/vision/modeling/__init__.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Modeling package definition."""
+
+from official.projects.qat.vision.modeling import layers
--- a/official/projects/qat/vision/modeling/factory.py
+++ b/official/projects/qat/vision/modeling/factory.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Factory methods to build models."""
+# Import libraries
+
+import tensorflow as tf
+
+import tensorflow_model_optimization as tfmot
+from official.projects.qat.vision.configs import common
+from official.projects.qat.vision.modeling import segmentation_model as qat_segmentation_model
+from official.projects.qat.vision.n_bit import schemes as n_bit_schemes
+from official.projects.qat.vision.quantization import schemes
+from official.vision.beta import configs
+from official.vision.beta.modeling import classification_model
+from official.vision.beta.modeling import retinanet_model
+from official.vision.beta.modeling.decoders import aspp
+from official.vision.beta.modeling.heads import segmentation_heads
+from official.vision.beta.modeling.layers import nn_layers
+
+
+def build_qat_classification_model(
+    model: tf.keras.Model,
+    quantization: common.Quantization,
+    input_specs: tf.keras.layers.InputSpec,
+    model_config: configs.image_classification.ImageClassificationModel,
+    l2_regularizer: tf.keras.regularizers.Regularizer = None
+) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
+  """Apply model optimization techniques.
+
+  Args:
+    model: The model applying model optimization techniques.
+    quantization: The Quantization config.
+    input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
+    model_config: The model config.
+    l2_regularizer: tf.keras.regularizers.Regularizer object. Default to None.
+
+  Returns:
+    model: The model that applied optimization techniques.
+  """
+  original_checkpoint = quantization.pretrained_original_checkpoint
+  if original_checkpoint:
+    ckpt = tf.train.Checkpoint(
+        model=model,
+        **model.checkpoint_items)
+    status = ckpt.read(original_checkpoint)
+    status.expect_partial().assert_existing_objects_matched()
+
+  scope_dict = {
+      'L2': tf.keras.regularizers.l2,
+  }
+  with tfmot.quantization.keras.quantize_scope(scope_dict):
+    annotated_backbone = tfmot.quantization.keras.quantize_annotate_model(
+        model.backbone)
+    if quantization.change_num_bits:
+      backbone = tfmot.quantization.keras.quantize_apply(
+          annotated_backbone,
+          scheme=n_bit_schemes.DefaultNBitQuantizeScheme(
+              num_bits_weight=quantization.num_bits_weight,
+              num_bits_activation=quantization.num_bits_activation))
+    else:
+      backbone = tfmot.quantization.keras.quantize_apply(
+          annotated_backbone,
+          scheme=schemes.Default8BitQuantizeScheme())
+
+  norm_activation_config = model_config.norm_activation
+  backbone_optimized_model = classification_model.ClassificationModel(
+      backbone=backbone,
+      num_classes=model_config.num_classes,
+      input_specs=input_specs,
+      dropout_rate=model_config.dropout_rate,
+      kernel_regularizer=l2_regularizer,
+      add_head_batch_norm=model_config.add_head_batch_norm,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon)
+  for from_layer, to_layer in zip(
+      model.layers, backbone_optimized_model.layers):
+    if from_layer != model.backbone:
+      to_layer.set_weights(from_layer.get_weights())
+
+  with tfmot.quantization.keras.quantize_scope(scope_dict):
+    def apply_quantization_to_dense(layer):
+      if isinstance(layer, (tf.keras.layers.Dense,
+                            tf.keras.layers.Dropout,
+                            tf.keras.layers.GlobalAveragePooling2D)):
+        return tfmot.quantization.keras.quantize_annotate_layer(layer)
+      return layer
+
+    annotated_model = tf.keras.models.clone_model(
+        backbone_optimized_model,
+        clone_function=apply_quantization_to_dense,
+    )
+
+    if quantization.change_num_bits:
+      optimized_model = tfmot.quantization.keras.quantize_apply(
+          annotated_model,
+          scheme=n_bit_schemes.DefaultNBitQuantizeScheme(
+              num_bits_weight=quantization.num_bits_weight,
+              num_bits_activation=quantization.num_bits_activation))
+
+    else:
+      optimized_model = tfmot.quantization.keras.quantize_apply(
+          annotated_model)
+
+  return optimized_model
+
+
+def build_qat_retinanet(
+    model: tf.keras.Model, quantization: common.Quantization,
+    model_config: configs.retinanet.RetinaNet) -> tf.keras.Model:
+  """Applies quantization aware training for RetinaNet model.
+
+  Args:
+    model: The model applying quantization aware training.
+    quantization: The Quantization config.
+    model_config: The model config.
+
+  Returns:
+    The model that applied optimization techniques.
+  """
+
+  original_checkpoint = quantization.pretrained_original_checkpoint
+  if original_checkpoint is not None:
+    ckpt = tf.train.Checkpoint(
+        model=model,
+        **model.checkpoint_items)
+    status = ckpt.read(original_checkpoint)
+    status.expect_partial().assert_existing_objects_matched()
+
+  scope_dict = {
+      'L2': tf.keras.regularizers.l2,
+  }
+  with tfmot.quantization.keras.quantize_scope(scope_dict):
+    annotated_backbone = tfmot.quantization.keras.quantize_annotate_model(
+        model.backbone)
+    optimized_backbone = tfmot.quantization.keras.quantize_apply(
+        annotated_backbone,
+        scheme=schemes.Default8BitQuantizeScheme())
+  optimized_model = retinanet_model.RetinaNetModel(
+      optimized_backbone,
+      model.decoder,
+      model.head,
+      model.detection_generator,
+      min_level=model_config.min_level,
+      max_level=model_config.max_level,
+      num_scales=model_config.anchor.num_scales,
+      aspect_ratios=model_config.anchor.aspect_ratios,
+      anchor_size=model_config.anchor.anchor_size)
+  return optimized_model
+
+
+def build_qat_segmentation_model(
+    model: tf.keras.Model, quantization: common.Quantization,
+    input_specs: tf.keras.layers.InputSpec) -> tf.keras.Model:
+  """Applies quantization aware training for segmentation model.
+
+  Args:
+    model: The model applying quantization aware training.
+    quantization: The Quantization config.
+    input_specs: The shape specifications of input tensor.
+
+  Returns:
+    The model that applied optimization techniques.
+  """
+
+  original_checkpoint = quantization.pretrained_original_checkpoint
+  if original_checkpoint is not None:
+    ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items)
+    status = ckpt.read(original_checkpoint)
+    status.expect_partial().assert_existing_objects_matched()
+
+  # Build quantization compatible model.
+  model = qat_segmentation_model.SegmentationModelQuantized(
+      model.backbone, model.decoder, model.head, input_specs)
+
+  scope_dict = {
+      'L2': tf.keras.regularizers.l2,
+  }
+
+  # Apply QAT to backbone (a tf.keras.Model) first.
+  with tfmot.quantization.keras.quantize_scope(scope_dict):
+    annotated_backbone = tfmot.quantization.keras.quantize_annotate_model(
+        model.backbone)
+    optimized_backbone = tfmot.quantization.keras.quantize_apply(
+        annotated_backbone, scheme=schemes.Default8BitQuantizeScheme())
+  backbone_optimized_model = qat_segmentation_model.SegmentationModelQuantized(
+      optimized_backbone, model.decoder, model.head, input_specs)
+
+  # Copy over all remaining layers.
+  for from_layer, to_layer in zip(model.layers,
+                                  backbone_optimized_model.layers):
+    if from_layer != model.backbone:
+      to_layer.set_weights(from_layer.get_weights())
+
+  with tfmot.quantization.keras.quantize_scope(scope_dict):
+
+    def apply_quantization_to_layers(layer):
+      if isinstance(layer, (segmentation_heads.SegmentationHead,
+                            nn_layers.SpatialPyramidPooling, aspp.ASPP)):
+        return tfmot.quantization.keras.quantize_annotate_layer(layer)
+      return layer
+
+    annotated_model = tf.keras.models.clone_model(
+        backbone_optimized_model,
+        clone_function=apply_quantization_to_layers,
+    )
+    optimized_model = tfmot.quantization.keras.quantize_apply(
+        annotated_model, scheme=schemes.Default8BitQuantizeScheme())
+
+  return optimized_model
--- a/official/projects/qat/vision/modeling/factory_test.py
+++ b/official/projects/qat/vision/modeling/factory_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for factory.py."""
+
+# Import libraries
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.projects.qat.vision.configs import common
+from official.projects.qat.vision.modeling import factory as qat_factory
+from official.vision.beta.configs import backbones
+from official.vision.beta.configs import decoders
+from official.vision.beta.configs import image_classification as classification_cfg
+from official.vision.beta.configs import retinanet as retinanet_cfg
+from official.vision.beta.configs import semantic_segmentation as semantic_segmentation_cfg
+from official.vision.beta.modeling import factory
+
+
+class ClassificationModelBuilderTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      ('resnet', (224, 224), 5e-5),
+      ('resnet', (224, 224), None),
+      ('resnet', (None, None), 5e-5),
+      ('resnet', (None, None), None),
+      ('mobilenet', (224, 224), 5e-5),
+      ('mobilenet', (224, 224), None),
+      ('mobilenet', (None, None), 5e-5),
+      ('mobilenet', (None, None), None),
+  )
+  def test_builder(self, backbone_type, input_size, weight_decay):
+    num_classes = 2
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size[0], input_size[1], 3])
+    model_config = classification_cfg.ImageClassificationModel(
+        num_classes=num_classes,
+        backbone=backbones.Backbone(type=backbone_type))
+    l2_regularizer = (
+        tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
+    model = factory.build_classification_model(
+        input_specs=input_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+
+    quantization_config = common.Quantization()
+    _ = qat_factory.build_qat_classification_model(
+        model=model,
+        input_specs=input_specs,
+        quantization=quantization_config,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+
+
+class RetinaNetBuilderTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      ('spinenet_mobile', (640, 640), False),
+  )
+  def test_builder(self, backbone_type, input_size, has_attribute_heads):
+    num_classes = 2
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size[0], input_size[1], 3])
+    if has_attribute_heads:
+      attribute_heads_config = [
+          retinanet_cfg.AttributeHead(name='att1'),
+          retinanet_cfg.AttributeHead(
+              name='att2', type='classification', size=2),
+      ]
+    else:
+      attribute_heads_config = None
+    model_config = retinanet_cfg.RetinaNet(
+        num_classes=num_classes,
+        backbone=backbones.Backbone(
+            type=backbone_type,
+            spinenet_mobile=backbones.SpineNetMobile(
+                model_id='49',
+                stochastic_depth_drop_rate=0.2,
+                min_level=3,
+                max_level=7,
+                use_keras_upsampling_2d=True)),
+        head=retinanet_cfg.RetinaNetHead(
+            attribute_heads=attribute_heads_config))
+    l2_regularizer = tf.keras.regularizers.l2(5e-5)
+    quantization_config = common.Quantization()
+    model = factory.build_retinanet(
+        input_specs=input_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+
+    _ = qat_factory.build_qat_retinanet(
+        model=model,
+        quantization=quantization_config,
+        model_config=model_config)
+    if has_attribute_heads:
+      self.assertEqual(model_config.head.attribute_heads[0].as_dict(),
+                       dict(name='att1', type='regression', size=1))
+      self.assertEqual(model_config.head.attribute_heads[1].as_dict(),
+                       dict(name='att2', type='classification', size=2))
+
+
+class SegmentationModelBuilderTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      ('mobilenet', (512, 512), 5e-5),)
+  def test_deeplabv3_builder(self, backbone_type, input_size, weight_decay):
+    num_classes = 21
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size[0], input_size[1], 3])
+    model_config = semantic_segmentation_cfg.SemanticSegmentationModel(
+        num_classes=num_classes,
+        backbone=backbones.Backbone(
+            type=backbone_type,
+            mobilenet=backbones.MobileNet(
+                model_id='MobileNetV2', output_stride=16)),
+        decoder=decoders.Decoder(
+            type='aspp',
+            aspp=decoders.ASPP(
+                level=4,
+                num_filters=256,
+                dilation_rates=[],
+                spp_layer_version='v1',
+                output_tensor=True)),
+        head=semantic_segmentation_cfg.SegmentationHead(
+            level=4,
+            low_level=2,
+            num_convs=1,
+            upsample_factor=2,
+            use_depthwise_convolution=True))
+    l2_regularizer = (
+        tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
+    model = factory.build_segmentation_model(
+        input_specs=input_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+    quantization_config = common.Quantization()
+    _ = qat_factory.build_qat_segmentation_model(
+        model=model, quantization=quantization_config, input_specs=input_specs)
+
+  @parameterized.parameters(
+      ('mobilenet', (512, 1024), 5e-5),)
+  def test_deeplabv3plus_builder(self, backbone_type, input_size, weight_decay):
+    num_classes = 19
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size[0], input_size[1], 3])
+    model_config = semantic_segmentation_cfg.SemanticSegmentationModel(
+        num_classes=num_classes,
+        backbone=backbones.Backbone(
+            type=backbone_type,
+            mobilenet=backbones.MobileNet(
+                model_id='MobileNetV2',
+                output_stride=16,
+                output_intermediate_endpoints=True)),
+        decoder=decoders.Decoder(
+            type='aspp',
+            aspp=decoders.ASPP(
+                level=4,
+                num_filters=256,
+                dilation_rates=[],
+                pool_kernel_size=[512, 1024],
+                use_depthwise_convolution=False,
+                spp_layer_version='v1',
+                output_tensor=True)),
+        head=semantic_segmentation_cfg.SegmentationHead(
+            level=4,
+            num_convs=2,
+            feature_fusion='deeplabv3plus',
+            use_depthwise_convolution=True,
+            low_level='2/depthwise',
+            low_level_num_filters=48,
+            prediction_kernel_size=1,
+            upsample_factor=1,
+            num_filters=256))
+    l2_regularizer = (
+        tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
+    model = factory.build_segmentation_model(
+        input_specs=input_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+    quantization_config = common.Quantization()
+    _ = qat_factory.build_qat_segmentation_model(
+        model=model, quantization=quantization_config, input_specs=input_specs)
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/modeling/layers/__init__.py
+++ b/official/projects/qat/vision/modeling/layers/__init__.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Layers package definition."""
+
+from official.projects.qat.vision.modeling.layers.nn_blocks import BottleneckBlockQuantized
+from official.projects.qat.vision.modeling.layers.nn_blocks import Conv2DBNBlockQuantized
+from official.projects.qat.vision.modeling.layers.nn_blocks import InvertedBottleneckBlockQuantized
--- a/official/projects/qat/vision/modeling/layers/nn_blocks.py
+++ b/official/projects/qat/vision/modeling/layers/nn_blocks.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains quantized neural blocks for the QAT."""
+from typing import Any, Dict, Optional, Sequence, Tuple, Union
+
+# Import libraries
+
+from absl import logging
+import tensorflow as tf
+
+import tensorflow_model_optimization as tfmot
+from official.modeling import tf_utils
+from official.projects.qat.vision.modeling.layers import nn_layers as qat_nn_layers
+from official.projects.qat.vision.quantization import configs
+from official.vision.beta.modeling.layers import nn_layers
+
+
+class NoOpActivation:
+  """No-op activation which simply returns the incoming tensor.
+
+  This activation is required to distinguish between `keras.activations.linear`
+  which does the same thing. The main difference is that NoOpActivation should
+  not have any quantize operation applied to it.
+  """
+
+  def __call__(self, x: tf.Tensor) -> tf.Tensor:
+    return x
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this object."""
+    return {}
+
+  def __eq__(self, other: Any) -> bool:
+    if not other or not isinstance(other, NoOpActivation):
+      return False
+
+    return True
+
+  def __ne__(self, other: Any) -> bool:
+    return not self.__eq__(other)
+
+
+def _quantize_wrapped_layer(cls, quantize_config):
+  def constructor(*arg, **kwargs):
+    return tfmot.quantization.keras.QuantizeWrapperV2(
+        cls(*arg, **kwargs),
+        quantize_config)
+  return constructor
+
+
+# This class is copied from modeling.layers.nn_blocks.BottleneckBlock and apply
+# QAT.
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class BottleneckBlockQuantized(tf.keras.layers.Layer):
+  """A quantized standard bottleneck block."""
+
+  def __init__(self,
+               filters: int,
+               strides: int,
+               dilation_rate: int = 1,
+               use_projection: bool = False,
+               se_ratio: Optional[float] = None,
+               resnetd_shortcut: bool = False,
+               stochastic_depth_drop_rate: Optional[float] = None,
+               kernel_initializer: str = 'VarianceScaling',
+               kernel_regularizer: tf.keras.regularizers.Regularizer = None,
+               bias_regularizer: tf.keras.regularizers.Regularizer = None,
+               activation: str = 'relu',
+               use_sync_bn: bool = False,
+               norm_momentum: float = 0.99,
+               norm_epsilon: float = 0.001,
+               bn_trainable: bool = True,  # pytype: disable=annotation-type-mismatch  # typed-keras
+               **kwargs):
+    """Initializes a standard bottleneck block with BN after convolutions.
+
+    Args:
+      filters: An `int` number of filters for the first two convolutions. Note
+        that the third and final convolution will use 4 times as many filters.
+      strides: An `int` block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      dilation_rate: An `int` dilation_rate of convolutions. Default to 1.
+      use_projection: A `bool` for whether this block should use a projection
+        shortcut (versus the default identity shortcut). This is usually `True`
+        for the first block of a block group, which may change the number of
+        filters and the resolution.
+      se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
+      resnetd_shortcut: A `bool`. If True, apply the resnetd style modification
+        to the shortcut connection.
+      stochastic_depth_drop_rate: A `float` or None. If not None, drop rate for
+        the stochastic depth layer.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
+        Default to None.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      bn_trainable: A `bool` that indicates whether batch norm layers should be
+        trainable. Default to True.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(BottleneckBlockQuantized, self).__init__(**kwargs)
+
+    self._filters = filters
+    self._strides = strides
+    self._dilation_rate = dilation_rate
+    self._use_projection = use_projection
+    self._se_ratio = se_ratio
+    self._resnetd_shortcut = resnetd_shortcut
+    self._use_sync_bn = use_sync_bn
+    self._activation = activation
+    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
+    self._kernel_initializer = kernel_initializer
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    if use_sync_bn:
+      self._norm = _quantize_wrapped_layer(
+          tf.keras.layers.experimental.SyncBatchNormalization,
+          configs.NoOpQuantizeConfig())
+      self._norm_with_quantize = _quantize_wrapped_layer(
+          tf.keras.layers.experimental.SyncBatchNormalization,
+          configs.Default8BitOutputQuantizeConfig())
+    else:
+      self._norm = _quantize_wrapped_layer(
+          tf.keras.layers.BatchNormalization,
+          configs.NoOpQuantizeConfig())
+      self._norm_with_quantize = _quantize_wrapped_layer(
+          tf.keras.layers.BatchNormalization,
+          configs.Default8BitOutputQuantizeConfig())
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._bn_trainable = bn_trainable
+
+  def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
+    """Build variables and child layers to prepare for calling."""
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(
+            ['kernel'], ['activation'], False))
+    if self._use_projection:
+      if self._resnetd_shortcut:
+        self._shortcut0 = tf.keras.layers.AveragePooling2D(
+            pool_size=2, strides=self._strides, padding='same')
+        self._shortcut1 = conv2d_quantized(
+            filters=self._filters * 4,
+            kernel_size=1,
+            strides=1,
+            use_bias=False,
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            bias_regularizer=self._bias_regularizer,
+            activation=NoOpActivation())
+      else:
+        self._shortcut = conv2d_quantized(
+            filters=self._filters * 4,
+            kernel_size=1,
+            strides=self._strides,
+            use_bias=False,
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            bias_regularizer=self._bias_regularizer,
+            activation=NoOpActivation())
+
+      self._norm0 = self._norm_with_quantize(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon,
+          trainable=self._bn_trainable)
+
+    self._conv1 = conv2d_quantized(
+        filters=self._filters,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm1 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon,
+        trainable=self._bn_trainable)
+    self._activation1 = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+
+    self._conv2 = conv2d_quantized(
+        filters=self._filters,
+        kernel_size=3,
+        strides=self._strides,
+        dilation_rate=self._dilation_rate,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm2 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon,
+        trainable=self._bn_trainable)
+    self._activation2 = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+
+    self._conv3 = conv2d_quantized(
+        filters=self._filters * 4,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm3 = self._norm_with_quantize(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon,
+        trainable=self._bn_trainable)
+    self._activation3 = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+
+    if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1:
+      self._squeeze_excitation = qat_nn_layers.SqueezeExcitationQuantized(
+          in_filters=self._filters * 4,
+          out_filters=self._filters * 4,
+          se_ratio=self._se_ratio,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)
+    else:
+      self._squeeze_excitation = None
+
+    if self._stochastic_depth_drop_rate:
+      self._stochastic_depth = nn_layers.StochasticDepth(
+          self._stochastic_depth_drop_rate)
+    else:
+      self._stochastic_depth = None
+    self._add = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf.keras.layers.Add(),
+        configs.Default8BitQuantizeConfig([], [], True))
+
+    super(BottleneckBlockQuantized, self).build(input_shape)
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this layer."""
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'dilation_rate': self._dilation_rate,
+        'use_projection': self._use_projection,
+        'se_ratio': self._se_ratio,
+        'resnetd_shortcut': self._resnetd_shortcut,
+        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon,
+        'bn_trainable': self._bn_trainable
+    }
+    base_config = super(BottleneckBlockQuantized, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(
+      self,
+      inputs: tf.Tensor,
+      training: Optional[Union[bool, tf.Tensor]] = None) -> tf.Tensor:
+    """Run the BottleneckBlockQuantized logics."""
+    shortcut = inputs
+    if self._use_projection:
+      if self._resnetd_shortcut:
+        shortcut = self._shortcut0(shortcut)
+        shortcut = self._shortcut1(shortcut)
+      else:
+        shortcut = self._shortcut(shortcut)
+      shortcut = self._norm0(shortcut)
+
+    x = self._conv1(inputs)
+    x = self._norm1(x)
+    x = self._activation1(x)
+
+    x = self._conv2(x)
+    x = self._norm2(x)
+    x = self._activation2(x)
+
+    x = self._conv3(x)
+    x = self._norm3(x)
+
+    if self._squeeze_excitation:
+      x = self._squeeze_excitation(x)
+
+    if self._stochastic_depth:
+      x = self._stochastic_depth(x, training=training)
+
+    x = self._add([x, shortcut])
+    return self._activation3(x)
+
+
+# This class is copied from modeling.backbones.mobilenet.Conv2DBNBlock and apply
+# QAT.
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class Conv2DBNBlockQuantized(tf.keras.layers.Layer):
+  """A quantized convolution block with batch normalization."""
+
+  def __init__(
+      self,
+      filters: int,
+      kernel_size: int = 3,
+      strides: int = 1,
+      use_bias: bool = False,
+      use_explicit_padding: bool = False,
+      activation: str = 'relu6',
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      use_normalization: bool = True,
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      **kwargs):
+    """A convolution block with batch normalization.
+
+    Args:
+      filters: An `int` number of filters for the first two convolutions. Note
+        that the third and final convolution will use 4 times as many filters.
+      kernel_size: An `int` specifying the height and width of the 2D
+        convolution window.
+      strides: An `int` of block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      use_bias: If True, use bias in the convolution layer.
+      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+        inputs so that the output dimensions are the same as if 'SAME' padding
+        were used.
+      activation: A `str` name of the activation function.
+      kernel_initializer: A `str` for kernel initializer of convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+        Default to None.
+      use_normalization: If True, use batch normalization.
+      use_sync_bn: If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(Conv2DBNBlockQuantized, self).__init__(**kwargs)
+    self._filters = filters
+    self._kernel_size = kernel_size
+    self._strides = strides
+    self._activation = activation
+    self._use_bias = use_bias
+    self._use_explicit_padding = use_explicit_padding
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._use_normalization = use_normalization
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+
+    if use_explicit_padding and kernel_size > 1:
+      self._padding = 'valid'
+    else:
+      self._padding = 'same'
+
+    norm_layer = (
+        tf.keras.layers.experimental.SyncBatchNormalization
+        if use_sync_bn else tf.keras.layers.BatchNormalization)
+    self._norm_with_quantize = _quantize_wrapped_layer(
+        norm_layer, configs.Default8BitOutputQuantizeConfig())
+    self._norm = _quantize_wrapped_layer(norm_layer,
+                                         configs.NoOpQuantizeConfig())
+
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this layer."""
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'kernel_size': self._kernel_size,
+        'use_bias': self._use_bias,
+        'use_explicit_padding': self._use_explicit_padding,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'use_normalization': self._use_normalization,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    base_config = super(Conv2DBNBlockQuantized, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def _norm_by_activation(self, activation):
+    if activation in ['relu', 'relu6']:
+      return self._norm
+    return self._norm_with_quantize
+
+  def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
+    """Build variables and child layers to prepare for calling."""
+    if self._use_explicit_padding and self._kernel_size > 1:
+      padding_size = nn_layers.get_padding_for_kernel_size(self._kernel_size)
+      self._pad = tf.keras.layers.ZeroPadding2D(padding_size)
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(
+            ['kernel'], ['activation'], False))
+    self._conv0 = conv2d_quantized(
+        filters=self._filters,
+        kernel_size=self._kernel_size,
+        strides=self._strides,
+        padding=self._padding,
+        use_bias=self._use_bias,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    if self._use_normalization:
+      self._norm0 = self._norm_by_activation(self._activation)(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+    self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+
+    super(Conv2DBNBlockQuantized, self).build(input_shape)
+
+  def call(
+      self,
+      inputs: tf.Tensor,
+      training: Optional[Union[bool, tf.Tensor]] = None) -> tf.Tensor:
+    """Run the Conv2DBNBlockQuantized logics."""
+    if self._use_explicit_padding and self._kernel_size > 1:
+      inputs = self._pad(inputs)
+    x = self._conv0(inputs)
+    if self._use_normalization:
+      x = self._norm0(x)
+    return self._activation_layer(x)
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class InvertedBottleneckBlockQuantized(tf.keras.layers.Layer):
+  """A quantized inverted bottleneck block."""
+
+  def __init__(self,
+               in_filters,
+               out_filters,
+               expand_ratio,
+               strides,
+               kernel_size=3,
+               se_ratio=None,
+               stochastic_depth_drop_rate=None,
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               se_inner_activation='relu',
+               se_gating_activation='sigmoid',
+               se_round_down_protect=True,
+               expand_se_in_filters=False,
+               depthwise_activation=None,
+               use_sync_bn=False,
+               dilation_rate=1,
+               divisible_by=1,
+               regularize_depthwise=False,
+               use_depthwise=True,
+               use_residual=True,
+               norm_momentum=0.99,
+               norm_epsilon=0.001,
+               output_intermediate_endpoints=False,
+               **kwargs):
+    """Initializes an inverted bottleneck block with BN after convolutions.
+
+    Args:
+      in_filters: An `int` number of filters of the input tensor.
+      out_filters: An `int` number of filters of the output tensor.
+      expand_ratio: An `int` of expand_ratio for an inverted bottleneck block.
+      strides: An `int` block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      kernel_size: An `int` kernel_size of the depthwise conv layer.
+      se_ratio: A `float` or None. If not None, se ratio for the squeeze and
+        excitation layer.
+      stochastic_depth_drop_rate: A `float` or None. if not None, drop rate for
+        the stochastic depth layer.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
+        Default to None.
+      activation: A `str` name of the activation function.
+      se_inner_activation: A `str` name of squeeze-excitation inner activation.
+      se_gating_activation: A `str` name of squeeze-excitation gating
+        activation.
+      se_round_down_protect: A `bool` of whether round down more than 10% will
+        be allowed in SE layer.
+      expand_se_in_filters: A `bool` of whether or not to expand in_filter in
+        squeeze and excitation layer.
+      depthwise_activation: A `str` name of the activation function for
+        depthwise only.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      dilation_rate: An `int` that specifies the dilation rate to use for.
+      divisible_by: An `int` that ensures all inner dimensions are divisible by
+        this number.
+      dilated convolution: An `int` to specify the same value for all spatial
+        dimensions.
+      regularize_depthwise: A `bool` of whether or not apply regularization on
+        depthwise.
+      use_depthwise: A `bool` of whether to uses fused convolutions instead of
+        depthwise.
+      use_residual: A `bool` of whether to include residual connection between
+        input and output.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      output_intermediate_endpoints: A `bool` of whether or not output the
+        intermediate endpoints.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(InvertedBottleneckBlockQuantized, self).__init__(**kwargs)
+
+    self._in_filters = in_filters
+    self._out_filters = out_filters
+    self._expand_ratio = expand_ratio
+    self._strides = strides
+    self._kernel_size = kernel_size
+    self._se_ratio = se_ratio
+    self._divisible_by = divisible_by
+    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
+    self._dilation_rate = dilation_rate
+    self._use_sync_bn = use_sync_bn
+    self._regularize_depthwise = regularize_depthwise
+    self._use_depthwise = use_depthwise
+    self._use_residual = use_residual
+    self._activation = activation
+    self._se_inner_activation = se_inner_activation
+    self._se_gating_activation = se_gating_activation
+    self._se_round_down_protect = se_round_down_protect
+    self._depthwise_activation = depthwise_activation
+    self._kernel_initializer = kernel_initializer
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._expand_se_in_filters = expand_se_in_filters
+    self._output_intermediate_endpoints = output_intermediate_endpoints
+
+    norm_layer = (
+        tf.keras.layers.experimental.SyncBatchNormalization
+        if use_sync_bn else tf.keras.layers.BatchNormalization)
+    self._norm_with_quantize = _quantize_wrapped_layer(
+        norm_layer, configs.Default8BitOutputQuantizeConfig())
+    self._norm = _quantize_wrapped_layer(norm_layer,
+                                         configs.NoOpQuantizeConfig())
+
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    if not depthwise_activation:
+      self._depthwise_activation = activation
+    if regularize_depthwise:
+      self._depthsize_regularizer = kernel_regularizer
+    else:
+      self._depthsize_regularizer = None
+
+  def _norm_by_activation(self, activation):
+    if activation in ['relu', 'relu6']:
+      return self._norm
+    return self._norm_with_quantize
+
+  def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
+    """Build variables and child layers to prepare for calling."""
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(
+            ['kernel'], ['activation'], False))
+    depthwise_conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.DepthwiseConv2D,
+        configs.Default8BitConvQuantizeConfig(
+            ['depthwise_kernel'], ['activation'], False))
+    expand_filters = self._in_filters
+    if self._expand_ratio > 1:
+      # First 1x1 conv for channel expansion.
+      expand_filters = nn_layers.make_divisible(
+          self._in_filters * self._expand_ratio, self._divisible_by)
+
+      expand_kernel = 1 if self._use_depthwise else self._kernel_size
+      expand_stride = 1 if self._use_depthwise else self._strides
+
+      self._conv0 = conv2d_quantized(
+          filters=expand_filters,
+          kernel_size=expand_kernel,
+          strides=expand_stride,
+          padding='same',
+          use_bias=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=NoOpActivation())
+      self._norm0 = self._norm_by_activation(self._activation)(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+      self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+          tf_utils.get_activation(self._activation, use_keras_layer=True),
+          configs.Default8BitActivationQuantizeConfig())
+
+    if self._use_depthwise:
+      # Depthwise conv.
+      self._conv1 = depthwise_conv2d_quantized(
+          kernel_size=(self._kernel_size, self._kernel_size),
+          strides=self._strides,
+          padding='same',
+          depth_multiplier=1,
+          dilation_rate=self._dilation_rate,
+          use_bias=False,
+          depthwise_initializer=self._kernel_initializer,
+          depthwise_regularizer=self._depthsize_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=NoOpActivation())
+      self._norm1 = self._norm_by_activation(self._depthwise_activation)(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+      self._depthwise_activation_layer = (
+          tfmot.quantization.keras.QuantizeWrapperV2(
+              tf_utils.get_activation(self._depthwise_activation,
+                                      use_keras_layer=True),
+              configs.Default8BitActivationQuantizeConfig()))
+
+    # Squeeze and excitation.
+    if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1:
+      logging.info('Use Squeeze and excitation.')
+      in_filters = self._in_filters
+      if self._expand_se_in_filters:
+        in_filters = expand_filters
+      self._squeeze_excitation = qat_nn_layers.SqueezeExcitationQuantized(
+          in_filters=in_filters,
+          out_filters=expand_filters,
+          se_ratio=self._se_ratio,
+          divisible_by=self._divisible_by,
+          round_down_protect=self._se_round_down_protect,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=self._se_inner_activation,
+          gating_activation=self._se_gating_activation)
+    else:
+      self._squeeze_excitation = None
+
+    # Last 1x1 conv.
+    self._conv2 = conv2d_quantized(
+        filters=self._out_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm2 = self._norm_with_quantize(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+
+    if self._stochastic_depth_drop_rate:
+      self._stochastic_depth = nn_layers.StochasticDepth(
+          self._stochastic_depth_drop_rate)
+    else:
+      self._stochastic_depth = None
+    self._add = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf.keras.layers.Add(),
+        configs.Default8BitQuantizeConfig([], [], True))
+
+    super(InvertedBottleneckBlockQuantized, self).build(input_shape)
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this layer."""
+    config = {
+        'in_filters': self._in_filters,
+        'out_filters': self._out_filters,
+        'expand_ratio': self._expand_ratio,
+        'strides': self._strides,
+        'kernel_size': self._kernel_size,
+        'se_ratio': self._se_ratio,
+        'divisible_by': self._divisible_by,
+        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'se_inner_activation': self._se_inner_activation,
+        'se_gating_activation': self._se_gating_activation,
+        'se_round_down_protect': self._se_round_down_protect,
+        'expand_se_in_filters': self._expand_se_in_filters,
+        'depthwise_activation': self._depthwise_activation,
+        'dilation_rate': self._dilation_rate,
+        'use_sync_bn': self._use_sync_bn,
+        'regularize_depthwise': self._regularize_depthwise,
+        'use_depthwise': self._use_depthwise,
+        'use_residual': self._use_residual,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon,
+        'output_intermediate_endpoints': self._output_intermediate_endpoints
+    }
+    base_config = super(InvertedBottleneckBlockQuantized, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(
+      self,
+      inputs: tf.Tensor,
+      training: Optional[Union[bool, tf.Tensor]] = None
+  ) -> Union[tf.Tensor, Tuple[tf.Tensor, Dict[str, tf.Tensor]]]:
+    """Run the InvertedBottleneckBlockQuantized logics."""
+    endpoints = {}
+    shortcut = inputs
+    if self._expand_ratio > 1:
+      x = self._conv0(inputs)
+      x = self._norm0(x)
+      x = self._activation_layer(x)
+    else:
+      x = inputs
+
+    if self._use_depthwise:
+      x = self._conv1(x)
+      x = self._norm1(x)
+      x = self._depthwise_activation_layer(x)
+      if self._output_intermediate_endpoints:
+        endpoints['depthwise'] = x
+
+    if self._squeeze_excitation:
+      x = self._squeeze_excitation(x)
+
+    x = self._conv2(x)
+    x = self._norm2(x)
+
+    if (self._use_residual and self._in_filters == self._out_filters and
+        self._strides == 1):
+      if self._stochastic_depth:
+        x = self._stochastic_depth(x, training=training)
+      x = self._add([x, shortcut])
+
+    if self._output_intermediate_endpoints:
+      return x, endpoints
+    return x