Internal change

PiperOrigin-RevId: 428078415

Internal change
PiperOrigin-RevId: 428078415
3aa48ea8 · Fan Yang · A. Unique TensorFlower · f670e89c · 3aa48ea8 · 3aa48ea8
Commit 3aa48ea8 authored Feb 11, 2022 by Fan Yang Committed by A. Unique TensorFlower Feb 14, 2022
20 changed files
--- a/official/projects/qat/vision/configs/retinanet_test.py
+++ b/official/projects/qat/vision/configs/retinanet_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for retinanet."""
+# pylint: disable=unused-import
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.qat.vision.configs import common
+from official.projects.qat.vision.configs import retinanet as qat_exp_cfg
+from official.vision import beta
+from official.vision.beta.configs import retinanet as exp_cfg
+
+
+class RetinaNetConfigTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(
+      ('retinanet_spinenet_mobile_coco_qat',),
+  )
+  def test_retinanet_configs(self, config_name):
+    config = exp_factory.get_exp_config(config_name)
+    self.assertIsInstance(config, cfg.ExperimentConfig)
+    self.assertIsInstance(config.task, qat_exp_cfg.RetinaNetTask)
+    self.assertIsInstance(config.task.model, exp_cfg.RetinaNet)
+    self.assertIsInstance(config.task.quantization, common.Quantization)
+    self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
+    config.validate()
+    config.task.train_data.is_training = None
+    with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
+      config.validate()
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/configs/semantic_segmentation.py
+++ b/official/projects/qat/vision/configs/semantic_segmentation.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""RetinaNet configuration definition."""
+import dataclasses
+from typing import Optional
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.qat.vision.configs import common
+from official.vision.beta.configs import semantic_segmentation
+
+
+@dataclasses.dataclass
+class SemanticSegmentationTask(semantic_segmentation.SemanticSegmentationTask):
+  quantization: Optional[common.Quantization] = None
+
+
+@exp_factory.register_config_factory('mnv2_deeplabv3_pascal_qat')
+def mnv2_deeplabv3_pascal() -> cfg.ExperimentConfig:
+  """Generates a config for MobileNet v2 + deeplab v3 with QAT."""
+  config = semantic_segmentation.mnv2_deeplabv3_pascal()
+  task = SemanticSegmentationTask.from_args(
+      quantization=common.Quantization(), **config.task.as_dict())
+  config.task = task
+  return config
+
+
+@exp_factory.register_config_factory('mnv2_deeplabv3_cityscapes_qat')
+def mnv2_deeplabv3_cityscapes() -> cfg.ExperimentConfig:
+  """Generates a config for MobileNet v2 + deeplab v3 with QAT."""
+  config = semantic_segmentation.mnv2_deeplabv3_cityscapes()
+  task = SemanticSegmentationTask.from_args(
+      quantization=common.Quantization(), **config.task.as_dict())
+  config.task = task
+  return config
+
+
+@exp_factory.register_config_factory('mnv2_deeplabv3plus_cityscapes_qat')
+def mnv2_deeplabv3plus_cityscapes() -> cfg.ExperimentConfig:
+  """Generates a config for MobileNet v2 + deeplab v3+ with QAT."""
+  config = semantic_segmentation.mnv2_deeplabv3plus_cityscapes()
+  task = SemanticSegmentationTask.from_args(
+      quantization=common.Quantization(), **config.task.as_dict())
+  config.task = task
+  return config
--- a/official/projects/qat/vision/configs/semantic_segmentation_test.py
+++ b/official/projects/qat/vision/configs/semantic_segmentation_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for retinanet."""
+# pylint: disable=unused-import
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.projects.qat.vision.configs import common
+from official.projects.qat.vision.configs import semantic_segmentation as qat_exp_cfg
+from official.vision import beta
+from official.vision.beta.configs import semantic_segmentation as exp_cfg
+
+
+class SemanticSegmentationConfigTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(('mnv2_deeplabv3_pascal_qat',),
+                            ('mnv2_deeplabv3_cityscapes_qat',),
+                            ('mnv2_deeplabv3plus_cityscapes_qat'))
+  def test_semantic_segmentation_configs(self, config_name):
+    config = exp_factory.get_exp_config(config_name)
+    self.assertIsInstance(config, cfg.ExperimentConfig)
+    self.assertIsInstance(config.task, qat_exp_cfg.SemanticSegmentationTask)
+    self.assertIsInstance(config.task.model, exp_cfg.SemanticSegmentationModel)
+    self.assertIsInstance(config.task.quantization, common.Quantization)
+    self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
+    config.validate()
+    config.task.train_data.is_training = None
+    with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
+      config.validate()
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/modeling/__init__.py
+++ b/official/projects/qat/vision/modeling/__init__.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Modeling package definition."""
+
+from official.projects.qat.vision.modeling import layers
--- a/official/projects/qat/vision/modeling/factory.py
+++ b/official/projects/qat/vision/modeling/factory.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Factory methods to build models."""
+# Import libraries
+
+import tensorflow as tf
+
+import tensorflow_model_optimization as tfmot
+from official.projects.qat.vision.configs import common
+from official.projects.qat.vision.modeling import segmentation_model as qat_segmentation_model
+from official.projects.qat.vision.n_bit import schemes as n_bit_schemes
+from official.projects.qat.vision.quantization import schemes
+from official.vision.beta import configs
+from official.vision.beta.modeling import classification_model
+from official.vision.beta.modeling import retinanet_model
+from official.vision.beta.modeling.decoders import aspp
+from official.vision.beta.modeling.heads import segmentation_heads
+from official.vision.beta.modeling.layers import nn_layers
+
+
+def build_qat_classification_model(
+    model: tf.keras.Model,
+    quantization: common.Quantization,
+    input_specs: tf.keras.layers.InputSpec,
+    model_config: configs.image_classification.ImageClassificationModel,
+    l2_regularizer: tf.keras.regularizers.Regularizer = None
+) -> tf.keras.Model:  # pytype: disable=annotation-type-mismatch  # typed-keras
+  """Apply model optimization techniques.
+
+  Args:
+    model: The model applying model optimization techniques.
+    quantization: The Quantization config.
+    input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
+    model_config: The model config.
+    l2_regularizer: tf.keras.regularizers.Regularizer object. Default to None.
+
+  Returns:
+    model: The model that applied optimization techniques.
+  """
+  original_checkpoint = quantization.pretrained_original_checkpoint
+  if original_checkpoint:
+    ckpt = tf.train.Checkpoint(
+        model=model,
+        **model.checkpoint_items)
+    status = ckpt.read(original_checkpoint)
+    status.expect_partial().assert_existing_objects_matched()
+
+  scope_dict = {
+      'L2': tf.keras.regularizers.l2,
+  }
+  with tfmot.quantization.keras.quantize_scope(scope_dict):
+    annotated_backbone = tfmot.quantization.keras.quantize_annotate_model(
+        model.backbone)
+    if quantization.change_num_bits:
+      backbone = tfmot.quantization.keras.quantize_apply(
+          annotated_backbone,
+          scheme=n_bit_schemes.DefaultNBitQuantizeScheme(
+              num_bits_weight=quantization.num_bits_weight,
+              num_bits_activation=quantization.num_bits_activation))
+    else:
+      backbone = tfmot.quantization.keras.quantize_apply(
+          annotated_backbone,
+          scheme=schemes.Default8BitQuantizeScheme())
+
+  norm_activation_config = model_config.norm_activation
+  backbone_optimized_model = classification_model.ClassificationModel(
+      backbone=backbone,
+      num_classes=model_config.num_classes,
+      input_specs=input_specs,
+      dropout_rate=model_config.dropout_rate,
+      kernel_regularizer=l2_regularizer,
+      add_head_batch_norm=model_config.add_head_batch_norm,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon)
+  for from_layer, to_layer in zip(
+      model.layers, backbone_optimized_model.layers):
+    if from_layer != model.backbone:
+      to_layer.set_weights(from_layer.get_weights())
+
+  with tfmot.quantization.keras.quantize_scope(scope_dict):
+    def apply_quantization_to_dense(layer):
+      if isinstance(layer, (tf.keras.layers.Dense,
+                            tf.keras.layers.Dropout,
+                            tf.keras.layers.GlobalAveragePooling2D)):
+        return tfmot.quantization.keras.quantize_annotate_layer(layer)
+      return layer
+
+    annotated_model = tf.keras.models.clone_model(
+        backbone_optimized_model,
+        clone_function=apply_quantization_to_dense,
+    )
+
+    if quantization.change_num_bits:
+      optimized_model = tfmot.quantization.keras.quantize_apply(
+          annotated_model,
+          scheme=n_bit_schemes.DefaultNBitQuantizeScheme(
+              num_bits_weight=quantization.num_bits_weight,
+              num_bits_activation=quantization.num_bits_activation))
+
+    else:
+      optimized_model = tfmot.quantization.keras.quantize_apply(
+          annotated_model)
+
+  return optimized_model
+
+
+def build_qat_retinanet(
+    model: tf.keras.Model, quantization: common.Quantization,
+    model_config: configs.retinanet.RetinaNet) -> tf.keras.Model:
+  """Applies quantization aware training for RetinaNet model.
+
+  Args:
+    model: The model applying quantization aware training.
+    quantization: The Quantization config.
+    model_config: The model config.
+
+  Returns:
+    The model that applied optimization techniques.
+  """
+
+  original_checkpoint = quantization.pretrained_original_checkpoint
+  if original_checkpoint is not None:
+    ckpt = tf.train.Checkpoint(
+        model=model,
+        **model.checkpoint_items)
+    status = ckpt.read(original_checkpoint)
+    status.expect_partial().assert_existing_objects_matched()
+
+  scope_dict = {
+      'L2': tf.keras.regularizers.l2,
+  }
+  with tfmot.quantization.keras.quantize_scope(scope_dict):
+    annotated_backbone = tfmot.quantization.keras.quantize_annotate_model(
+        model.backbone)
+    optimized_backbone = tfmot.quantization.keras.quantize_apply(
+        annotated_backbone,
+        scheme=schemes.Default8BitQuantizeScheme())
+  optimized_model = retinanet_model.RetinaNetModel(
+      optimized_backbone,
+      model.decoder,
+      model.head,
+      model.detection_generator,
+      min_level=model_config.min_level,
+      max_level=model_config.max_level,
+      num_scales=model_config.anchor.num_scales,
+      aspect_ratios=model_config.anchor.aspect_ratios,
+      anchor_size=model_config.anchor.anchor_size)
+  return optimized_model
+
+
+def build_qat_segmentation_model(
+    model: tf.keras.Model, quantization: common.Quantization,
+    input_specs: tf.keras.layers.InputSpec) -> tf.keras.Model:
+  """Applies quantization aware training for segmentation model.
+
+  Args:
+    model: The model applying quantization aware training.
+    quantization: The Quantization config.
+    input_specs: The shape specifications of input tensor.
+
+  Returns:
+    The model that applied optimization techniques.
+  """
+
+  original_checkpoint = quantization.pretrained_original_checkpoint
+  if original_checkpoint is not None:
+    ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items)
+    status = ckpt.read(original_checkpoint)
+    status.expect_partial().assert_existing_objects_matched()
+
+  # Build quantization compatible model.
+  model = qat_segmentation_model.SegmentationModelQuantized(
+      model.backbone, model.decoder, model.head, input_specs)
+
+  scope_dict = {
+      'L2': tf.keras.regularizers.l2,
+  }
+
+  # Apply QAT to backbone (a tf.keras.Model) first.
+  with tfmot.quantization.keras.quantize_scope(scope_dict):
+    annotated_backbone = tfmot.quantization.keras.quantize_annotate_model(
+        model.backbone)
+    optimized_backbone = tfmot.quantization.keras.quantize_apply(
+        annotated_backbone, scheme=schemes.Default8BitQuantizeScheme())
+  backbone_optimized_model = qat_segmentation_model.SegmentationModelQuantized(
+      optimized_backbone, model.decoder, model.head, input_specs)
+
+  # Copy over all remaining layers.
+  for from_layer, to_layer in zip(model.layers,
+                                  backbone_optimized_model.layers):
+    if from_layer != model.backbone:
+      to_layer.set_weights(from_layer.get_weights())
+
+  with tfmot.quantization.keras.quantize_scope(scope_dict):
+
+    def apply_quantization_to_layers(layer):
+      if isinstance(layer, (segmentation_heads.SegmentationHead,
+                            nn_layers.SpatialPyramidPooling, aspp.ASPP)):
+        return tfmot.quantization.keras.quantize_annotate_layer(layer)
+      return layer
+
+    annotated_model = tf.keras.models.clone_model(
+        backbone_optimized_model,
+        clone_function=apply_quantization_to_layers,
+    )
+    optimized_model = tfmot.quantization.keras.quantize_apply(
+        annotated_model, scheme=schemes.Default8BitQuantizeScheme())
+
+  return optimized_model
--- a/official/projects/qat/vision/modeling/factory_test.py
+++ b/official/projects/qat/vision/modeling/factory_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for factory.py."""
+
+# Import libraries
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.projects.qat.vision.configs import common
+from official.projects.qat.vision.modeling import factory as qat_factory
+from official.vision.beta.configs import backbones
+from official.vision.beta.configs import decoders
+from official.vision.beta.configs import image_classification as classification_cfg
+from official.vision.beta.configs import retinanet as retinanet_cfg
+from official.vision.beta.configs import semantic_segmentation as semantic_segmentation_cfg
+from official.vision.beta.modeling import factory
+
+
+class ClassificationModelBuilderTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      ('resnet', (224, 224), 5e-5),
+      ('resnet', (224, 224), None),
+      ('resnet', (None, None), 5e-5),
+      ('resnet', (None, None), None),
+      ('mobilenet', (224, 224), 5e-5),
+      ('mobilenet', (224, 224), None),
+      ('mobilenet', (None, None), 5e-5),
+      ('mobilenet', (None, None), None),
+  )
+  def test_builder(self, backbone_type, input_size, weight_decay):
+    num_classes = 2
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size[0], input_size[1], 3])
+    model_config = classification_cfg.ImageClassificationModel(
+        num_classes=num_classes,
+        backbone=backbones.Backbone(type=backbone_type))
+    l2_regularizer = (
+        tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
+    model = factory.build_classification_model(
+        input_specs=input_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+
+    quantization_config = common.Quantization()
+    _ = qat_factory.build_qat_classification_model(
+        model=model,
+        input_specs=input_specs,
+        quantization=quantization_config,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+
+
+class RetinaNetBuilderTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      ('spinenet_mobile', (640, 640), False),
+  )
+  def test_builder(self, backbone_type, input_size, has_attribute_heads):
+    num_classes = 2
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size[0], input_size[1], 3])
+    if has_attribute_heads:
+      attribute_heads_config = [
+          retinanet_cfg.AttributeHead(name='att1'),
+          retinanet_cfg.AttributeHead(
+              name='att2', type='classification', size=2),
+      ]
+    else:
+      attribute_heads_config = None
+    model_config = retinanet_cfg.RetinaNet(
+        num_classes=num_classes,
+        backbone=backbones.Backbone(
+            type=backbone_type,
+            spinenet_mobile=backbones.SpineNetMobile(
+                model_id='49',
+                stochastic_depth_drop_rate=0.2,
+                min_level=3,
+                max_level=7,
+                use_keras_upsampling_2d=True)),
+        head=retinanet_cfg.RetinaNetHead(
+            attribute_heads=attribute_heads_config))
+    l2_regularizer = tf.keras.regularizers.l2(5e-5)
+    quantization_config = common.Quantization()
+    model = factory.build_retinanet(
+        input_specs=input_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+
+    _ = qat_factory.build_qat_retinanet(
+        model=model,
+        quantization=quantization_config,
+        model_config=model_config)
+    if has_attribute_heads:
+      self.assertEqual(model_config.head.attribute_heads[0].as_dict(),
+                       dict(name='att1', type='regression', size=1))
+      self.assertEqual(model_config.head.attribute_heads[1].as_dict(),
+                       dict(name='att2', type='classification', size=2))
+
+
+class SegmentationModelBuilderTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      ('mobilenet', (512, 512), 5e-5),)
+  def test_deeplabv3_builder(self, backbone_type, input_size, weight_decay):
+    num_classes = 21
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size[0], input_size[1], 3])
+    model_config = semantic_segmentation_cfg.SemanticSegmentationModel(
+        num_classes=num_classes,
+        backbone=backbones.Backbone(
+            type=backbone_type,
+            mobilenet=backbones.MobileNet(
+                model_id='MobileNetV2', output_stride=16)),
+        decoder=decoders.Decoder(
+            type='aspp',
+            aspp=decoders.ASPP(
+                level=4,
+                num_filters=256,
+                dilation_rates=[],
+                spp_layer_version='v1',
+                output_tensor=True)),
+        head=semantic_segmentation_cfg.SegmentationHead(
+            level=4,
+            low_level=2,
+            num_convs=1,
+            upsample_factor=2,
+            use_depthwise_convolution=True))
+    l2_regularizer = (
+        tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
+    model = factory.build_segmentation_model(
+        input_specs=input_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+    quantization_config = common.Quantization()
+    _ = qat_factory.build_qat_segmentation_model(
+        model=model, quantization=quantization_config, input_specs=input_specs)
+
+  @parameterized.parameters(
+      ('mobilenet', (512, 1024), 5e-5),)
+  def test_deeplabv3plus_builder(self, backbone_type, input_size, weight_decay):
+    num_classes = 19
+    input_specs = tf.keras.layers.InputSpec(
+        shape=[None, input_size[0], input_size[1], 3])
+    model_config = semantic_segmentation_cfg.SemanticSegmentationModel(
+        num_classes=num_classes,
+        backbone=backbones.Backbone(
+            type=backbone_type,
+            mobilenet=backbones.MobileNet(
+                model_id='MobileNetV2',
+                output_stride=16,
+                output_intermediate_endpoints=True)),
+        decoder=decoders.Decoder(
+            type='aspp',
+            aspp=decoders.ASPP(
+                level=4,
+                num_filters=256,
+                dilation_rates=[],
+                pool_kernel_size=[512, 1024],
+                use_depthwise_convolution=False,
+                spp_layer_version='v1',
+                output_tensor=True)),
+        head=semantic_segmentation_cfg.SegmentationHead(
+            level=4,
+            num_convs=2,
+            feature_fusion='deeplabv3plus',
+            use_depthwise_convolution=True,
+            low_level='2/depthwise',
+            low_level_num_filters=48,
+            prediction_kernel_size=1,
+            upsample_factor=1,
+            num_filters=256))
+    l2_regularizer = (
+        tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
+    model = factory.build_segmentation_model(
+        input_specs=input_specs,
+        model_config=model_config,
+        l2_regularizer=l2_regularizer)
+    quantization_config = common.Quantization()
+    _ = qat_factory.build_qat_segmentation_model(
+        model=model, quantization=quantization_config, input_specs=input_specs)
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/modeling/layers/__init__.py
+++ b/official/projects/qat/vision/modeling/layers/__init__.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Layers package definition."""
+
+from official.projects.qat.vision.modeling.layers.nn_blocks import BottleneckBlockQuantized
+from official.projects.qat.vision.modeling.layers.nn_blocks import Conv2DBNBlockQuantized
+from official.projects.qat.vision.modeling.layers.nn_blocks import InvertedBottleneckBlockQuantized
--- a/official/projects/qat/vision/modeling/layers/nn_blocks.py
+++ b/official/projects/qat/vision/modeling/layers/nn_blocks.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains quantized neural blocks for the QAT."""
+from typing import Any, Dict, Optional, Sequence, Tuple, Union
+
+# Import libraries
+
+from absl import logging
+import tensorflow as tf
+
+import tensorflow_model_optimization as tfmot
+from official.modeling import tf_utils
+from official.projects.qat.vision.modeling.layers import nn_layers as qat_nn_layers
+from official.projects.qat.vision.quantization import configs
+from official.vision.beta.modeling.layers import nn_layers
+
+
+class NoOpActivation:
+  """No-op activation which simply returns the incoming tensor.
+
+  This activation is required to distinguish between `keras.activations.linear`
+  which does the same thing. The main difference is that NoOpActivation should
+  not have any quantize operation applied to it.
+  """
+
+  def __call__(self, x: tf.Tensor) -> tf.Tensor:
+    return x
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this object."""
+    return {}
+
+  def __eq__(self, other: Any) -> bool:
+    if not other or not isinstance(other, NoOpActivation):
+      return False
+
+    return True
+
+  def __ne__(self, other: Any) -> bool:
+    return not self.__eq__(other)
+
+
+def _quantize_wrapped_layer(cls, quantize_config):
+  def constructor(*arg, **kwargs):
+    return tfmot.quantization.keras.QuantizeWrapperV2(
+        cls(*arg, **kwargs),
+        quantize_config)
+  return constructor
+
+
+# This class is copied from modeling.layers.nn_blocks.BottleneckBlock and apply
+# QAT.
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class BottleneckBlockQuantized(tf.keras.layers.Layer):
+  """A quantized standard bottleneck block."""
+
+  def __init__(self,
+               filters: int,
+               strides: int,
+               dilation_rate: int = 1,
+               use_projection: bool = False,
+               se_ratio: Optional[float] = None,
+               resnetd_shortcut: bool = False,
+               stochastic_depth_drop_rate: Optional[float] = None,
+               kernel_initializer: str = 'VarianceScaling',
+               kernel_regularizer: tf.keras.regularizers.Regularizer = None,
+               bias_regularizer: tf.keras.regularizers.Regularizer = None,
+               activation: str = 'relu',
+               use_sync_bn: bool = False,
+               norm_momentum: float = 0.99,
+               norm_epsilon: float = 0.001,
+               bn_trainable: bool = True,  # pytype: disable=annotation-type-mismatch  # typed-keras
+               **kwargs):
+    """Initializes a standard bottleneck block with BN after convolutions.
+
+    Args:
+      filters: An `int` number of filters for the first two convolutions. Note
+        that the third and final convolution will use 4 times as many filters.
+      strides: An `int` block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      dilation_rate: An `int` dilation_rate of convolutions. Default to 1.
+      use_projection: A `bool` for whether this block should use a projection
+        shortcut (versus the default identity shortcut). This is usually `True`
+        for the first block of a block group, which may change the number of
+        filters and the resolution.
+      se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
+      resnetd_shortcut: A `bool`. If True, apply the resnetd style modification
+        to the shortcut connection.
+      stochastic_depth_drop_rate: A `float` or None. If not None, drop rate for
+        the stochastic depth layer.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
+        Default to None.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      bn_trainable: A `bool` that indicates whether batch norm layers should be
+        trainable. Default to True.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(BottleneckBlockQuantized, self).__init__(**kwargs)
+
+    self._filters = filters
+    self._strides = strides
+    self._dilation_rate = dilation_rate
+    self._use_projection = use_projection
+    self._se_ratio = se_ratio
+    self._resnetd_shortcut = resnetd_shortcut
+    self._use_sync_bn = use_sync_bn
+    self._activation = activation
+    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
+    self._kernel_initializer = kernel_initializer
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    if use_sync_bn:
+      self._norm = _quantize_wrapped_layer(
+          tf.keras.layers.experimental.SyncBatchNormalization,
+          configs.NoOpQuantizeConfig())
+      self._norm_with_quantize = _quantize_wrapped_layer(
+          tf.keras.layers.experimental.SyncBatchNormalization,
+          configs.Default8BitOutputQuantizeConfig())
+    else:
+      self._norm = _quantize_wrapped_layer(
+          tf.keras.layers.BatchNormalization,
+          configs.NoOpQuantizeConfig())
+      self._norm_with_quantize = _quantize_wrapped_layer(
+          tf.keras.layers.BatchNormalization,
+          configs.Default8BitOutputQuantizeConfig())
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._bn_trainable = bn_trainable
+
+  def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
+    """Build variables and child layers to prepare for calling."""
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(
+            ['kernel'], ['activation'], False))
+    if self._use_projection:
+      if self._resnetd_shortcut:
+        self._shortcut0 = tf.keras.layers.AveragePooling2D(
+            pool_size=2, strides=self._strides, padding='same')
+        self._shortcut1 = conv2d_quantized(
+            filters=self._filters * 4,
+            kernel_size=1,
+            strides=1,
+            use_bias=False,
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            bias_regularizer=self._bias_regularizer,
+            activation=NoOpActivation())
+      else:
+        self._shortcut = conv2d_quantized(
+            filters=self._filters * 4,
+            kernel_size=1,
+            strides=self._strides,
+            use_bias=False,
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            bias_regularizer=self._bias_regularizer,
+            activation=NoOpActivation())
+
+      self._norm0 = self._norm_with_quantize(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon,
+          trainable=self._bn_trainable)
+
+    self._conv1 = conv2d_quantized(
+        filters=self._filters,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm1 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon,
+        trainable=self._bn_trainable)
+    self._activation1 = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+
+    self._conv2 = conv2d_quantized(
+        filters=self._filters,
+        kernel_size=3,
+        strides=self._strides,
+        dilation_rate=self._dilation_rate,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm2 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon,
+        trainable=self._bn_trainable)
+    self._activation2 = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+
+    self._conv3 = conv2d_quantized(
+        filters=self._filters * 4,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm3 = self._norm_with_quantize(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon,
+        trainable=self._bn_trainable)
+    self._activation3 = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+
+    if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1:
+      self._squeeze_excitation = qat_nn_layers.SqueezeExcitationQuantized(
+          in_filters=self._filters * 4,
+          out_filters=self._filters * 4,
+          se_ratio=self._se_ratio,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)
+    else:
+      self._squeeze_excitation = None
+
+    if self._stochastic_depth_drop_rate:
+      self._stochastic_depth = nn_layers.StochasticDepth(
+          self._stochastic_depth_drop_rate)
+    else:
+      self._stochastic_depth = None
+    self._add = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf.keras.layers.Add(),
+        configs.Default8BitQuantizeConfig([], [], True))
+
+    super(BottleneckBlockQuantized, self).build(input_shape)
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this layer."""
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'dilation_rate': self._dilation_rate,
+        'use_projection': self._use_projection,
+        'se_ratio': self._se_ratio,
+        'resnetd_shortcut': self._resnetd_shortcut,
+        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon,
+        'bn_trainable': self._bn_trainable
+    }
+    base_config = super(BottleneckBlockQuantized, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(
+      self,
+      inputs: tf.Tensor,
+      training: Optional[Union[bool, tf.Tensor]] = None) -> tf.Tensor:
+    """Run the BottleneckBlockQuantized logics."""
+    shortcut = inputs
+    if self._use_projection:
+      if self._resnetd_shortcut:
+        shortcut = self._shortcut0(shortcut)
+        shortcut = self._shortcut1(shortcut)
+      else:
+        shortcut = self._shortcut(shortcut)
+      shortcut = self._norm0(shortcut)
+
+    x = self._conv1(inputs)
+    x = self._norm1(x)
+    x = self._activation1(x)
+
+    x = self._conv2(x)
+    x = self._norm2(x)
+    x = self._activation2(x)
+
+    x = self._conv3(x)
+    x = self._norm3(x)
+
+    if self._squeeze_excitation:
+      x = self._squeeze_excitation(x)
+
+    if self._stochastic_depth:
+      x = self._stochastic_depth(x, training=training)
+
+    x = self._add([x, shortcut])
+    return self._activation3(x)
+
+
+# This class is copied from modeling.backbones.mobilenet.Conv2DBNBlock and apply
+# QAT.
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class Conv2DBNBlockQuantized(tf.keras.layers.Layer):
+  """A quantized convolution block with batch normalization."""
+
+  def __init__(
+      self,
+      filters: int,
+      kernel_size: int = 3,
+      strides: int = 1,
+      use_bias: bool = False,
+      use_explicit_padding: bool = False,
+      activation: str = 'relu6',
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      use_normalization: bool = True,
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      **kwargs):
+    """A convolution block with batch normalization.
+
+    Args:
+      filters: An `int` number of filters for the first two convolutions. Note
+        that the third and final convolution will use 4 times as many filters.
+      kernel_size: An `int` specifying the height and width of the 2D
+        convolution window.
+      strides: An `int` of block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      use_bias: If True, use bias in the convolution layer.
+      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+        inputs so that the output dimensions are the same as if 'SAME' padding
+        were used.
+      activation: A `str` name of the activation function.
+      kernel_initializer: A `str` for kernel initializer of convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+        Default to None.
+      use_normalization: If True, use batch normalization.
+      use_sync_bn: If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(Conv2DBNBlockQuantized, self).__init__(**kwargs)
+    self._filters = filters
+    self._kernel_size = kernel_size
+    self._strides = strides
+    self._activation = activation
+    self._use_bias = use_bias
+    self._use_explicit_padding = use_explicit_padding
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._use_normalization = use_normalization
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+
+    if use_explicit_padding and kernel_size > 1:
+      self._padding = 'valid'
+    else:
+      self._padding = 'same'
+
+    norm_layer = (
+        tf.keras.layers.experimental.SyncBatchNormalization
+        if use_sync_bn else tf.keras.layers.BatchNormalization)
+    self._norm_with_quantize = _quantize_wrapped_layer(
+        norm_layer, configs.Default8BitOutputQuantizeConfig())
+    self._norm = _quantize_wrapped_layer(norm_layer,
+                                         configs.NoOpQuantizeConfig())
+
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this layer."""
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'kernel_size': self._kernel_size,
+        'use_bias': self._use_bias,
+        'use_explicit_padding': self._use_explicit_padding,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'use_normalization': self._use_normalization,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    base_config = super(Conv2DBNBlockQuantized, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def _norm_by_activation(self, activation):
+    if activation in ['relu', 'relu6']:
+      return self._norm
+    return self._norm_with_quantize
+
+  def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
+    """Build variables and child layers to prepare for calling."""
+    if self._use_explicit_padding and self._kernel_size > 1:
+      padding_size = nn_layers.get_padding_for_kernel_size(self._kernel_size)
+      self._pad = tf.keras.layers.ZeroPadding2D(padding_size)
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(
+            ['kernel'], ['activation'], False))
+    self._conv0 = conv2d_quantized(
+        filters=self._filters,
+        kernel_size=self._kernel_size,
+        strides=self._strides,
+        padding=self._padding,
+        use_bias=self._use_bias,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    if self._use_normalization:
+      self._norm0 = self._norm_by_activation(self._activation)(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+    self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+
+    super(Conv2DBNBlockQuantized, self).build(input_shape)
+
+  def call(
+      self,
+      inputs: tf.Tensor,
+      training: Optional[Union[bool, tf.Tensor]] = None) -> tf.Tensor:
+    """Run the Conv2DBNBlockQuantized logics."""
+    if self._use_explicit_padding and self._kernel_size > 1:
+      inputs = self._pad(inputs)
+    x = self._conv0(inputs)
+    if self._use_normalization:
+      x = self._norm0(x)
+    return self._activation_layer(x)
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class InvertedBottleneckBlockQuantized(tf.keras.layers.Layer):
+  """A quantized inverted bottleneck block."""
+
+  def __init__(self,
+               in_filters,
+               out_filters,
+               expand_ratio,
+               strides,
+               kernel_size=3,
+               se_ratio=None,
+               stochastic_depth_drop_rate=None,
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               se_inner_activation='relu',
+               se_gating_activation='sigmoid',
+               se_round_down_protect=True,
+               expand_se_in_filters=False,
+               depthwise_activation=None,
+               use_sync_bn=False,
+               dilation_rate=1,
+               divisible_by=1,
+               regularize_depthwise=False,
+               use_depthwise=True,
+               use_residual=True,
+               norm_momentum=0.99,
+               norm_epsilon=0.001,
+               output_intermediate_endpoints=False,
+               **kwargs):
+    """Initializes an inverted bottleneck block with BN after convolutions.
+
+    Args:
+      in_filters: An `int` number of filters of the input tensor.
+      out_filters: An `int` number of filters of the output tensor.
+      expand_ratio: An `int` of expand_ratio for an inverted bottleneck block.
+      strides: An `int` block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      kernel_size: An `int` kernel_size of the depthwise conv layer.
+      se_ratio: A `float` or None. If not None, se ratio for the squeeze and
+        excitation layer.
+      stochastic_depth_drop_rate: A `float` or None. if not None, drop rate for
+        the stochastic depth layer.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
+        Default to None.
+      activation: A `str` name of the activation function.
+      se_inner_activation: A `str` name of squeeze-excitation inner activation.
+      se_gating_activation: A `str` name of squeeze-excitation gating
+        activation.
+      se_round_down_protect: A `bool` of whether round down more than 10% will
+        be allowed in SE layer.
+      expand_se_in_filters: A `bool` of whether or not to expand in_filter in
+        squeeze and excitation layer.
+      depthwise_activation: A `str` name of the activation function for
+        depthwise only.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      dilation_rate: An `int` that specifies the dilation rate to use for.
+      divisible_by: An `int` that ensures all inner dimensions are divisible by
+        this number.
+      dilated convolution: An `int` to specify the same value for all spatial
+        dimensions.
+      regularize_depthwise: A `bool` of whether or not apply regularization on
+        depthwise.
+      use_depthwise: A `bool` of whether to uses fused convolutions instead of
+        depthwise.
+      use_residual: A `bool` of whether to include residual connection between
+        input and output.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      output_intermediate_endpoints: A `bool` of whether or not output the
+        intermediate endpoints.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super(InvertedBottleneckBlockQuantized, self).__init__(**kwargs)
+
+    self._in_filters = in_filters
+    self._out_filters = out_filters
+    self._expand_ratio = expand_ratio
+    self._strides = strides
+    self._kernel_size = kernel_size
+    self._se_ratio = se_ratio
+    self._divisible_by = divisible_by
+    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
+    self._dilation_rate = dilation_rate
+    self._use_sync_bn = use_sync_bn
+    self._regularize_depthwise = regularize_depthwise
+    self._use_depthwise = use_depthwise
+    self._use_residual = use_residual
+    self._activation = activation
+    self._se_inner_activation = se_inner_activation
+    self._se_gating_activation = se_gating_activation
+    self._se_round_down_protect = se_round_down_protect
+    self._depthwise_activation = depthwise_activation
+    self._kernel_initializer = kernel_initializer
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._expand_se_in_filters = expand_se_in_filters
+    self._output_intermediate_endpoints = output_intermediate_endpoints
+
+    norm_layer = (
+        tf.keras.layers.experimental.SyncBatchNormalization
+        if use_sync_bn else tf.keras.layers.BatchNormalization)
+    self._norm_with_quantize = _quantize_wrapped_layer(
+        norm_layer, configs.Default8BitOutputQuantizeConfig())
+    self._norm = _quantize_wrapped_layer(norm_layer,
+                                         configs.NoOpQuantizeConfig())
+
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    if not depthwise_activation:
+      self._depthwise_activation = activation
+    if regularize_depthwise:
+      self._depthsize_regularizer = kernel_regularizer
+    else:
+      self._depthsize_regularizer = None
+
+  def _norm_by_activation(self, activation):
+    if activation in ['relu', 'relu6']:
+      return self._norm
+    return self._norm_with_quantize
+
+  def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
+    """Build variables and child layers to prepare for calling."""
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(
+            ['kernel'], ['activation'], False))
+    depthwise_conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.DepthwiseConv2D,
+        configs.Default8BitConvQuantizeConfig(
+            ['depthwise_kernel'], ['activation'], False))
+    expand_filters = self._in_filters
+    if self._expand_ratio > 1:
+      # First 1x1 conv for channel expansion.
+      expand_filters = nn_layers.make_divisible(
+          self._in_filters * self._expand_ratio, self._divisible_by)
+
+      expand_kernel = 1 if self._use_depthwise else self._kernel_size
+      expand_stride = 1 if self._use_depthwise else self._strides
+
+      self._conv0 = conv2d_quantized(
+          filters=expand_filters,
+          kernel_size=expand_kernel,
+          strides=expand_stride,
+          padding='same',
+          use_bias=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=NoOpActivation())
+      self._norm0 = self._norm_by_activation(self._activation)(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+      self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+          tf_utils.get_activation(self._activation, use_keras_layer=True),
+          configs.Default8BitActivationQuantizeConfig())
+
+    if self._use_depthwise:
+      # Depthwise conv.
+      self._conv1 = depthwise_conv2d_quantized(
+          kernel_size=(self._kernel_size, self._kernel_size),
+          strides=self._strides,
+          padding='same',
+          depth_multiplier=1,
+          dilation_rate=self._dilation_rate,
+          use_bias=False,
+          depthwise_initializer=self._kernel_initializer,
+          depthwise_regularizer=self._depthsize_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=NoOpActivation())
+      self._norm1 = self._norm_by_activation(self._depthwise_activation)(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+      self._depthwise_activation_layer = (
+          tfmot.quantization.keras.QuantizeWrapperV2(
+              tf_utils.get_activation(self._depthwise_activation,
+                                      use_keras_layer=True),
+              configs.Default8BitActivationQuantizeConfig()))
+
+    # Squeeze and excitation.
+    if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1:
+      logging.info('Use Squeeze and excitation.')
+      in_filters = self._in_filters
+      if self._expand_se_in_filters:
+        in_filters = expand_filters
+      self._squeeze_excitation = qat_nn_layers.SqueezeExcitationQuantized(
+          in_filters=in_filters,
+          out_filters=expand_filters,
+          se_ratio=self._se_ratio,
+          divisible_by=self._divisible_by,
+          round_down_protect=self._se_round_down_protect,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=self._se_inner_activation,
+          gating_activation=self._se_gating_activation)
+    else:
+      self._squeeze_excitation = None
+
+    # Last 1x1 conv.
+    self._conv2 = conv2d_quantized(
+        filters=self._out_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm2 = self._norm_with_quantize(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+
+    if self._stochastic_depth_drop_rate:
+      self._stochastic_depth = nn_layers.StochasticDepth(
+          self._stochastic_depth_drop_rate)
+    else:
+      self._stochastic_depth = None
+    self._add = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf.keras.layers.Add(),
+        configs.Default8BitQuantizeConfig([], [], True))
+
+    super(InvertedBottleneckBlockQuantized, self).build(input_shape)
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this layer."""
+    config = {
+        'in_filters': self._in_filters,
+        'out_filters': self._out_filters,
+        'expand_ratio': self._expand_ratio,
+        'strides': self._strides,
+        'kernel_size': self._kernel_size,
+        'se_ratio': self._se_ratio,
+        'divisible_by': self._divisible_by,
+        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'se_inner_activation': self._se_inner_activation,
+        'se_gating_activation': self._se_gating_activation,
+        'se_round_down_protect': self._se_round_down_protect,
+        'expand_se_in_filters': self._expand_se_in_filters,
+        'depthwise_activation': self._depthwise_activation,
+        'dilation_rate': self._dilation_rate,
+        'use_sync_bn': self._use_sync_bn,
+        'regularize_depthwise': self._regularize_depthwise,
+        'use_depthwise': self._use_depthwise,
+        'use_residual': self._use_residual,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon,
+        'output_intermediate_endpoints': self._output_intermediate_endpoints
+    }
+    base_config = super(InvertedBottleneckBlockQuantized, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(
+      self,
+      inputs: tf.Tensor,
+      training: Optional[Union[bool, tf.Tensor]] = None
+  ) -> Union[tf.Tensor, Tuple[tf.Tensor, Dict[str, tf.Tensor]]]:
+    """Run the InvertedBottleneckBlockQuantized logics."""
+    endpoints = {}
+    shortcut = inputs
+    if self._expand_ratio > 1:
+      x = self._conv0(inputs)
+      x = self._norm0(x)
+      x = self._activation_layer(x)
+    else:
+      x = inputs
+
+    if self._use_depthwise:
+      x = self._conv1(x)
+      x = self._norm1(x)
+      x = self._depthwise_activation_layer(x)
+      if self._output_intermediate_endpoints:
+        endpoints['depthwise'] = x
+
+    if self._squeeze_excitation:
+      x = self._squeeze_excitation(x)
+
+    x = self._conv2(x)
+    x = self._norm2(x)
+
+    if (self._use_residual and self._in_filters == self._out_filters and
+        self._strides == 1):
+      if self._stochastic_depth:
+        x = self._stochastic_depth(x, training=training)
+      x = self._add([x, shortcut])
+
+    if self._output_intermediate_endpoints:
+      return x, endpoints
+    return x
--- a/official/projects/qat/vision/modeling/layers/nn_blocks_test.py
+++ b/official/projects/qat/vision/modeling/layers/nn_blocks_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for nn_blocks."""
+
+from typing import Any, Iterable, Tuple
+# Import libraries
+from absl.testing import parameterized
+import tensorflow as tf
+
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import strategy_combinations
+from official.projects.qat.vision.modeling.layers import nn_blocks
+
+
+def distribution_strategy_combinations() -> Iterable[Tuple[Any, ...]]:
+  """Returns the combinations of end-to-end tests to run."""
+  return combinations.combine(
+      distribution=[
+          strategy_combinations.default_strategy,
+          strategy_combinations.cloud_tpu_strategy,
+          strategy_combinations.one_device_strategy_gpu,
+      ],
+  )
+
+
+class NNBlocksTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (nn_blocks.BottleneckBlockQuantized, 1, False, 0.0, None),
+      (nn_blocks.BottleneckBlockQuantized, 2, True, 0.2, 0.25),
+  )
+  def test_bottleneck_block_creation(self, block_fn, strides, use_projection,
+                                     stochastic_depth_drop_rate, se_ratio):
+    input_size = 128
+    filter_size = 256
+    inputs = tf.keras.Input(
+        shape=(input_size, input_size, filter_size * 4), batch_size=1)
+    block = block_fn(
+        filter_size,
+        strides,
+        use_projection=use_projection,
+        se_ratio=se_ratio,
+        stochastic_depth_drop_rate=stochastic_depth_drop_rate)
+
+    features = block(inputs)
+
+    self.assertAllEqual(
+        [1, input_size // strides, input_size // strides, filter_size * 4],
+        features.shape.as_list())
+
+  @parameterized.parameters(
+      (nn_blocks.InvertedBottleneckBlockQuantized, 1, 1, None, None),
+      (nn_blocks.InvertedBottleneckBlockQuantized, 6, 1, None, None),
+      (nn_blocks.InvertedBottleneckBlockQuantized, 1, 2, None, None),
+      (nn_blocks.InvertedBottleneckBlockQuantized, 1, 1, 0.2, None),
+      (nn_blocks.InvertedBottleneckBlockQuantized, 1, 1, None, 0.2),
+  )
+  def test_invertedbottleneck_block_creation(
+      self, block_fn, expand_ratio, strides, se_ratio,
+      stochastic_depth_drop_rate):
+    input_size = 128
+    in_filters = 24
+    out_filters = 40
+    inputs = tf.keras.Input(
+        shape=(input_size, input_size, in_filters), batch_size=1)
+    block = block_fn(
+        in_filters=in_filters,
+        out_filters=out_filters,
+        expand_ratio=expand_ratio,
+        strides=strides,
+        se_ratio=se_ratio,
+        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
+        output_intermediate_endpoints=False)
+
+    features = block(inputs)
+
+    self.assertAllEqual(
+        [1, input_size // strides, input_size // strides, out_filters],
+        features.shape.as_list())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/modeling/layers/nn_layers.py
+++ b/official/projects/qat/vision/modeling/layers/nn_layers.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains common building blocks for neural networks."""
+
+from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union
+
+import tensorflow as tf
+
+import tensorflow_model_optimization as tfmot
+from official.modeling import tf_utils
+from official.projects.qat.vision.quantization import configs
+from official.vision.beta.modeling.decoders import aspp
+from official.vision.beta.modeling.layers import nn_layers
+
+
+# Type annotations.
+States = Dict[str, tf.Tensor]
+Activation = Union[str, Callable]
+
+
+class NoOpActivation:
+  """No-op activation which simply returns the incoming tensor.
+
+  This activation is required to distinguish between `keras.activations.linear`
+  which does the same thing. The main difference is that NoOpActivation should
+  not have any quantize operation applied to it.
+  """
+
+  def __call__(self, x: tf.Tensor) -> tf.Tensor:
+    return x
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this object."""
+    return {}
+
+  def __eq__(self, other: Any) -> bool:
+    return isinstance(other, NoOpActivation)
+
+  def __ne__(self, other: Any) -> bool:
+    return not self.__eq__(other)
+
+
+def _quantize_wrapped_layer(cls, quantize_config):
+  def constructor(*arg, **kwargs):
+    return tfmot.quantization.keras.QuantizeWrapperV2(
+        cls(*arg, **kwargs),
+        quantize_config)
+  return constructor
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class SqueezeExcitationQuantized(tf.keras.layers.Layer):
+  """Creates a squeeze and excitation layer."""
+
+  def __init__(self,
+               in_filters,
+               out_filters,
+               se_ratio,
+               divisible_by=1,
+               use_3d_input=False,
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               gating_activation='sigmoid',
+               round_down_protect=True,
+               **kwargs):
+    """Initializes a squeeze and excitation layer.
+
+    Args:
+      in_filters: An `int` number of filters of the input tensor.
+      out_filters: An `int` number of filters of the output tensor.
+      se_ratio: A `float` or None. If not None, se ratio for the squeeze and
+        excitation layer.
+      divisible_by: An `int` that ensures all inner dimensions are divisible by
+        this number.
+      use_3d_input: A `bool` of whether input is 2D or 3D image.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
+        Default to None.
+      activation: A `str` name of the activation function.
+      gating_activation: A `str` name of the activation function for final
+        gating function.
+      round_down_protect: A `bool` of whether round down more than 10% will be
+        allowed.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super().__init__(**kwargs)
+
+    self._in_filters = in_filters
+    self._out_filters = out_filters
+    self._se_ratio = se_ratio
+    self._divisible_by = divisible_by
+    self._round_down_protect = round_down_protect
+    self._use_3d_input = use_3d_input
+    self._activation = activation
+    self._gating_activation = gating_activation
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      if not use_3d_input:
+        self._spatial_axis = [1, 2]
+      else:
+        self._spatial_axis = [1, 2, 3]
+    else:
+      if not use_3d_input:
+        self._spatial_axis = [2, 3]
+      else:
+        self._spatial_axis = [2, 3, 4]
+
+  def _create_gating_activation_layer(self):
+    if self._gating_activation == 'hard_sigmoid':
+      # Convert hard_sigmoid activation to quantizable keras layers so each op
+      # can be properly quantized.
+      # Formula is hard_sigmoid(x) = relu6(x + 3) * 0.16667.
+      self._add = tfmot.quantization.keras.QuantizeWrapperV2(
+          tf.keras.layers.Add(), configs.Default8BitQuantizeConfig([], [],
+                                                                   True))
+      self._relu6 = tfmot.quantization.keras.QuantizeWrapperV2(
+          tf_utils.get_activation('relu6', use_keras_layer=True),
+          configs.Default8BitActivationQuantizeConfig())
+    else:
+      self._gating_activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+          tf_utils.get_activation(
+              self._gating_activation, use_keras_layer=True),
+          configs.Default8BitActivationQuantizeConfig())
+
+  def _apply_gating_activation_layer(self, x: tf.Tensor) -> tf.Tensor:
+    if self._gating_activation == 'hard_sigmoid':
+      x = self._add([x, 3.0 * tf.ones_like(x)])
+      x = self._relu6(x)
+      x = self._multiply([x, 0.16667 * tf.ones_like(x)])
+    else:
+      x = self._gating_activation_layer(x)
+    return x
+
+  def build(self, input_shape):
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(
+            ['kernel'], ['activation'], False))
+    conv2d_quantized_output_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(
+            ['kernel'], ['activation'], True))
+    num_reduced_filters = nn_layers.make_divisible(
+        max(1, int(self._in_filters * self._se_ratio)),
+        divisor=self._divisible_by,
+        round_down_protect=self._round_down_protect)
+
+    self._se_reduce = conv2d_quantized(
+        filters=num_reduced_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=True,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+
+    self._se_expand = conv2d_quantized_output_quantized(
+        filters=self._out_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=True,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+
+    self._multiply = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf.keras.layers.Multiply(),
+        configs.Default8BitQuantizeConfig([], [], True))
+    self._reduce_mean_quantizer = (
+        tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
+            num_bits=8, per_axis=False, symmetric=False, narrow_range=False))
+    self._reduce_mean_quantizer_vars = self._reduce_mean_quantizer.build(
+        None, 'reduce_mean_quantizer_vars', self)
+
+    self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+    self._create_gating_activation_layer()
+
+    super().build(input_shape)
+
+  def get_config(self):
+    config = {
+        'in_filters': self._in_filters,
+        'out_filters': self._out_filters,
+        'se_ratio': self._se_ratio,
+        'divisible_by': self._divisible_by,
+        'use_3d_input': self._use_3d_input,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'gating_activation': self._gating_activation,
+        'round_down_protect': self._round_down_protect,
+    }
+    base_config = super().get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(self, inputs, training=None):
+    x = tf.reduce_mean(inputs, self._spatial_axis, keepdims=True)
+    x = self._reduce_mean_quantizer(
+        x, training, self._reduce_mean_quantizer_vars)
+    x = self._activation_layer(self._se_reduce(x))
+    x = self._apply_gating_activation_layer(self._se_expand(x))
+    x = self._multiply([x, inputs])
+    return x
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class SegmentationHeadQuantized(tf.keras.layers.Layer):
+  """Creates a segmentation head."""
+
+  def __init__(
+      self,
+      num_classes: int,
+      level: Union[int, str],
+      num_convs: int = 2,
+      num_filters: int = 256,
+      use_depthwise_convolution: bool = False,
+      prediction_kernel_size: int = 1,
+      upsample_factor: int = 1,
+      feature_fusion: Optional[str] = None,
+      decoder_min_level: Optional[int] = None,
+      decoder_max_level: Optional[int] = None,
+      low_level: int = 2,
+      low_level_num_filters: int = 48,
+      num_decoder_filters: int = 256,
+      activation: str = 'relu',
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      **kwargs):
+    """Initializes a segmentation head.
+
+    Args:
+      num_classes: An `int` number of mask classification categories. The number
+        of classes does not include background class.
+      level: An `int` or `str`, level to use to build segmentation head.
+      num_convs: An `int` number of stacked convolution before the last
+        prediction layer.
+      num_filters: An `int` number to specify the number of filters used.
+        Default is 256.
+      use_depthwise_convolution: A bool to specify if use depthwise separable
+        convolutions.
+      prediction_kernel_size: An `int` number to specify the kernel size of the
+        prediction layer.
+      upsample_factor: An `int` number to specify the upsampling factor to
+        generate finer mask. Default 1 means no upsampling is applied.
+      feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, or None. If
+        `deeplabv3plus`, features from decoder_features[level] will be fused
+        with low level feature maps from backbone. If `pyramid_fusion`,
+        multiscale features will be resized and fused at the target level.
+      decoder_min_level: An `int` of minimum level from decoder to use in
+        feature fusion. It is only used when feature_fusion is set to
+        `panoptic_fpn_fusion`.
+      decoder_max_level: An `int` of maximum level from decoder to use in
+        feature fusion. It is only used when feature_fusion is set to
+        `panoptic_fpn_fusion`.
+      low_level: An `int` of backbone level to be used for feature fusion. It is
+        used when feature_fusion is set to `deeplabv3plus`.
+      low_level_num_filters: An `int` of reduced number of filters for the low
+        level features before fusing it with higher level features. It is only
+        used when feature_fusion is set to `deeplabv3plus`.
+      num_decoder_filters: An `int` of number of filters in the decoder outputs.
+        It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
+      activation: A `str` that indicates which activation is used, e.g. 'relu',
+        'swish', etc.
+      use_sync_bn: A `bool` that indicates whether to use synchronized batch
+        normalization across different replicas.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super().__init__(**kwargs)
+
+    self._config_dict = {
+        'num_classes': num_classes,
+        'level': level,
+        'num_convs': num_convs,
+        'num_filters': num_filters,
+        'use_depthwise_convolution': use_depthwise_convolution,
+        'prediction_kernel_size': prediction_kernel_size,
+        'upsample_factor': upsample_factor,
+        'feature_fusion': feature_fusion,
+        'decoder_min_level': decoder_min_level,
+        'decoder_max_level': decoder_max_level,
+        'low_level': low_level,
+        'low_level_num_filters': low_level_num_filters,
+        'num_decoder_filters': num_decoder_filters,
+        'activation': activation,
+        'use_sync_bn': use_sync_bn,
+        'norm_momentum': norm_momentum,
+        'norm_epsilon': norm_epsilon,
+        'kernel_regularizer': kernel_regularizer,
+        'bias_regularizer': bias_regularizer,
+    }
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+
+  def build(self, input_shape: Sequence[tf.TensorShape]):
+    """Creates the variables of the segmentation head."""
+    # When input_shape is a list/tuple, the first corresponds to backbone
+    # features used for resizing the decoder features (the second) if feature
+    # fusion type is `deeplabv3plus`.
+    backbone_shape = input_shape[0]
+    use_depthwise_convolution = self._config_dict['use_depthwise_convolution']
+    random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01)
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(['kernel'], ['activation'],
+                                              False))
+    conv2d_quantized_output_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(['kernel'], ['activation'], True))
+    depthwise_conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.DepthwiseConv2D,
+        configs.Default8BitConvQuantizeConfig(['depthwise_kernel'],
+                                              ['activation'], False))
+    conv_kwargs = {
+        'kernel_size': 3 if not use_depthwise_convolution else 1,
+        'padding': 'same',
+        'use_bias': False,
+        'kernel_initializer': random_initializer,
+        'kernel_regularizer': self._config_dict['kernel_regularizer'],
+    }
+
+    norm_layer = (
+        tf.keras.layers.experimental.SyncBatchNormalization
+        if self._config_dict['use_sync_bn'] else
+        tf.keras.layers.BatchNormalization)
+    norm_with_quantize = _quantize_wrapped_layer(
+        norm_layer, configs.Default8BitOutputQuantizeConfig())
+    norm = norm_with_quantize if self._config_dict['activation'] not in [
+        'relu', 'relu6'
+    ] else _quantize_wrapped_layer(norm_layer, configs.NoOpQuantizeConfig())
+
+    bn_kwargs = {
+        'axis': self._bn_axis,
+        'momentum': self._config_dict['norm_momentum'],
+        'epsilon': self._config_dict['norm_epsilon'],
+    }
+
+    if self._config_dict['feature_fusion'] == 'deeplabv3plus':
+      # Deeplabv3+ feature fusion layers.
+      self._dlv3p_conv = conv2d_quantized(
+          kernel_size=1,
+          padding='same',
+          use_bias=False,
+          kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
+          kernel_regularizer=self._config_dict['kernel_regularizer'],
+          name='segmentation_head_deeplabv3p_fusion_conv',
+          filters=self._config_dict['low_level_num_filters'],
+          activation=NoOpActivation())
+
+      self._dlv3p_norm = norm(
+          name='segmentation_head_deeplabv3p_fusion_norm', **bn_kwargs)
+
+    # Segmentation head layers.
+    self._convs = []
+    self._norms = []
+    for i in range(self._config_dict['num_convs']):
+      if use_depthwise_convolution:
+        self._convs.append(
+            depthwise_conv2d_quantized(
+                name='segmentation_head_depthwise_conv_{}'.format(i),
+                kernel_size=3,
+                padding='same',
+                use_bias=False,
+                depthwise_initializer=random_initializer,
+                depthwise_regularizer=self._config_dict['kernel_regularizer'],
+                depth_multiplier=1,
+                activation=NoOpActivation()))
+        norm_name = 'segmentation_head_depthwise_norm_{}'.format(i)
+        self._norms.append(norm(name=norm_name, **bn_kwargs))
+      conv_name = 'segmentation_head_conv_{}'.format(i)
+      self._convs.append(
+          conv2d_quantized(
+              name=conv_name,
+              filters=self._config_dict['num_filters'],
+              activation=NoOpActivation(),
+              **conv_kwargs))
+      norm_name = 'segmentation_head_norm_{}'.format(i)
+      self._norms.append(norm(name=norm_name, **bn_kwargs))
+
+    self._classifier = conv2d_quantized_output_quantized(
+        name='segmentation_output',
+        filters=self._config_dict['num_classes'],
+        kernel_size=self._config_dict['prediction_kernel_size'],
+        padding='same',
+        bias_initializer=tf.zeros_initializer(),
+        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
+        kernel_regularizer=self._config_dict['kernel_regularizer'],
+        bias_regularizer=self._config_dict['bias_regularizer'],
+        activation=NoOpActivation())
+
+    upsampling = _quantize_wrapped_layer(
+        tf.keras.layers.UpSampling2D,
+        configs.Default8BitQuantizeConfig([], [], True))
+    self._upsampling_layer = upsampling(
+        size=(self._config_dict['upsample_factor'],
+              self._config_dict['upsample_factor']),
+        interpolation='nearest')
+    self._resizing_layer = tf.keras.layers.Resizing(
+        backbone_shape[1], backbone_shape[2], interpolation='bilinear')
+
+    concat = _quantize_wrapped_layer(
+        tf.keras.layers.Concatenate,
+        configs.Default8BitQuantizeConfig([], [], True))
+    self._concat_layer = concat(axis=self._bn_axis)
+
+    super().build(input_shape)
+
+  def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
+                               Union[tf.Tensor, Mapping[str, tf.Tensor]]]):
+    """Forward pass of the segmentation head.
+
+    It supports both a tuple of 2 tensors or 2 dictionaries. The first is
+    backbone endpoints, and the second is decoder endpoints. When inputs are
+    tensors, they are from a single level of feature maps. When inputs are
+    dictionaries, they contain multiple levels of feature maps, where the key
+    is the index of feature map.
+
+    Args:
+      inputs: A tuple of 2 feature map tensors of shape
+        [batch, height_l, width_l, channels] or 2 dictionaries of tensors:
+        - key: A `str` of the level of the multilevel features.
+        - values: A `tf.Tensor` of the feature map tensors, whose shape is
+          [batch, height_l, width_l, channels].
+
+    Returns:
+      segmentation prediction mask: A `tf.Tensor` of the segmentation mask
+        scores predicted from input features.
+    """
+    if self._config_dict['feature_fusion'] in ('pyramid_fusion',
+                                               'panoptic_fpn_fusion'):
+      raise ValueError(
+          'The feature fusion method `pyramid_fusion` is not supported in QAT.')
+
+    backbone_output = inputs[0]
+    decoder_output = inputs[1]
+    if self._config_dict['feature_fusion'] == 'deeplabv3plus':
+      # deeplabv3+ feature fusion.
+      x = decoder_output[str(self._config_dict['level'])] if isinstance(
+          decoder_output, dict) else decoder_output
+      y = backbone_output[str(self._config_dict['low_level'])] if isinstance(
+          backbone_output, dict) else backbone_output
+      y = self._dlv3p_norm(self._dlv3p_conv(y))
+      y = self._activation_layer(y)
+      x = self._resizing_layer(x)
+      x = tf.cast(x, dtype=y.dtype)
+      x = self._concat_layer([x, y])
+    else:
+      x = decoder_output[str(self._config_dict['level'])] if isinstance(
+          decoder_output, dict) else decoder_output
+
+    for conv, norm in zip(self._convs, self._norms):
+      x = conv(x)
+      x = norm(x)
+      x = self._activation_layer(x)
+    if self._config_dict['upsample_factor'] > 1:
+      # Use keras layer for nearest upsampling so it is QAT compatible.
+      x = self._upsampling_layer(x)
+
+    return self._classifier(x)
+
+  def get_config(self):
+    base_config = super().get_config()
+    return dict(list(base_config.items()) + list(self._config_dict.items()))
+
+  @classmethod
+  def from_config(cls, config):
+    return cls(**config)
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class SpatialPyramidPoolingQuantized(nn_layers.SpatialPyramidPooling):
+  """Implements the quantized Atrous Spatial Pyramid Pooling.
+
+  References:
+    [Rethinking Atrous Convolution for Semantic Image Segmentation](
+      https://arxiv.org/pdf/1706.05587.pdf)
+    [Encoder-Decoder with Atrous Separable Convolution for Semantic Image
+    Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
+  """
+
+  def __init__(
+      self,
+      output_channels: int,
+      dilation_rates: List[int],
+      pool_kernel_size: Optional[List[int]] = None,
+      use_sync_bn: bool = False,
+      batchnorm_momentum: float = 0.99,
+      batchnorm_epsilon: float = 0.001,
+      activation: str = 'relu',
+      dropout: float = 0.5,
+      kernel_initializer: str = 'GlorotUniform',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      interpolation: str = 'bilinear',
+      use_depthwise_convolution: bool = False,
+      **kwargs):
+    """Initializes `SpatialPyramidPooling`.
+
+    Args:
+      output_channels: Number of channels produced by SpatialPyramidPooling.
+      dilation_rates: A list of integers for parallel dilated conv.
+      pool_kernel_size: A list of integers or None. If None, global average
+        pooling is applied, otherwise an average pooling of pool_kernel_size is
+        applied.
+      use_sync_bn: A bool, whether or not to use sync batch normalization.
+      batchnorm_momentum: A float for the momentum in BatchNorm. Defaults to
+        0.99.
+      batchnorm_epsilon: A float for the epsilon value in BatchNorm. Defaults to
+        0.001.
+      activation: A `str` for type of activation to be used. Defaults to 'relu'.
+      dropout: A float for the dropout rate before output. Defaults to 0.5.
+      kernel_initializer: Kernel initializer for conv layers. Defaults to
+        `glorot_uniform`.
+      kernel_regularizer: Kernel regularizer for conv layers. Defaults to None.
+      interpolation: The interpolation method for upsampling. Defaults to
+        `bilinear`.
+      use_depthwise_convolution: Allows spatial pooling to be separable
+        depthwise convolusions. [Encoder-Decoder with Atrous Separable
+        Convolution for Semantic Image Segmentation](
+         https://arxiv.org/pdf/1802.02611.pdf)
+      **kwargs: Other keyword arguments for the layer.
+    """
+    super().__init__(
+        output_channels=output_channels,
+        dilation_rates=dilation_rates,
+        use_sync_bn=use_sync_bn,
+        batchnorm_momentum=batchnorm_momentum,
+        batchnorm_epsilon=batchnorm_epsilon,
+        activation=activation,
+        dropout=dropout,
+        kernel_initializer=kernel_initializer,
+        kernel_regularizer=kernel_regularizer,
+        interpolation=interpolation,
+        pool_kernel_size=pool_kernel_size,
+        use_depthwise_convolution=use_depthwise_convolution)
+
+    self._activation_fn = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(activation, use_keras_layer=True),
+        configs.Default8BitActivationQuantizeConfig())
+    self._activation_fn_no_quant = (
+        tf_utils.get_activation(activation, use_keras_layer=True))
+
+  def build(self, input_shape):
+    height = input_shape[1]
+    width = input_shape[2]
+    channels = input_shape[3]
+
+    norm_layer = (
+        tf.keras.layers.experimental.SyncBatchNormalization
+        if self._use_sync_bn else tf.keras.layers.BatchNormalization)
+    norm_with_quantize = _quantize_wrapped_layer(
+        norm_layer, configs.Default8BitOutputQuantizeConfig())
+    norm = norm_with_quantize if self._activation not in [
+        'relu', 'relu6'
+    ] else _quantize_wrapped_layer(norm_layer, configs.NoOpQuantizeConfig())
+
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.Default8BitConvQuantizeConfig(['kernel'], ['activation'],
+                                              False))
+    depthwise_conv2d_quantized_output_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.DepthwiseConv2D,
+        configs.Default8BitConvQuantizeConfig(['depthwise_kernel'],
+                                              ['activation'], True))
+
+    self.aspp_layers = []
+
+    conv1 = conv2d_quantized(
+        filters=self._output_channels,
+        kernel_size=(1, 1),
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        use_bias=False,
+        activation=NoOpActivation())
+    norm1 = norm(
+        axis=self._bn_axis,
+        momentum=self._batchnorm_momentum,
+        epsilon=self._batchnorm_epsilon)
+
+    self.aspp_layers.append([conv1, norm1])
+
+    for dilation_rate in self._dilation_rates:
+      leading_layers = []
+      kernel_size = (3, 3)
+      if self._use_depthwise_convolution:
+        leading_layers += [
+            depthwise_conv2d_quantized_output_quantized(
+                depth_multiplier=1,
+                kernel_size=kernel_size,
+                padding='same',
+                depthwise_regularizer=self._kernel_regularizer,
+                depthwise_initializer=self._kernel_initializer,
+                dilation_rate=dilation_rate,
+                use_bias=False,
+                activation=NoOpActivation())
+        ]
+        kernel_size = (1, 1)
+      conv_dilation = leading_layers + [
+          conv2d_quantized(
+              filters=self._output_channels,
+              kernel_size=kernel_size,
+              padding='same',
+              kernel_regularizer=self._kernel_regularizer,
+              kernel_initializer=self._kernel_initializer,
+              dilation_rate=dilation_rate,
+              use_bias=False,
+              activation=NoOpActivation())
+      ]
+      norm_dilation = norm(
+          axis=self._bn_axis,
+          momentum=self._batchnorm_momentum,
+          epsilon=self._batchnorm_epsilon)
+
+      self.aspp_layers.append(conv_dilation + [norm_dilation])
+
+    if self._pool_kernel_size is None:
+      pooling = [
+          _quantize_wrapped_layer(
+              tf.keras.layers.GlobalAveragePooling2D,
+              configs.Default8BitQuantizeConfig([], [], True))(),
+          _quantize_wrapped_layer(
+              tf.keras.layers.Reshape,
+              configs.Default8BitQuantizeConfig([], [], True))((1, 1, channels))
+      ]
+    else:
+      pooling = [
+          _quantize_wrapped_layer(
+              tf.keras.layers.AveragePooling2D,
+              configs.Default8BitQuantizeConfig([], [],
+                                                True))(self._pool_kernel_size)
+      ]
+
+    conv2 = conv2d_quantized(
+        filters=self._output_channels,
+        kernel_size=(1, 1),
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        use_bias=False,
+        activation=NoOpActivation())
+    norm2 = norm(
+        axis=self._bn_axis,
+        momentum=self._batchnorm_momentum,
+        epsilon=self._batchnorm_epsilon)
+
+    self.aspp_layers.append(pooling + [conv2, norm2])
+
+    resizing = _quantize_wrapped_layer(
+        tf.keras.layers.Resizing, configs.Default8BitQuantizeConfig([], [],
+                                                                    True))
+    self._resizing_layer = resizing(
+        height, width, interpolation=self._interpolation)
+
+    self._projection = [
+        conv2d_quantized(
+            filters=self._output_channels,
+            kernel_size=(1, 1),
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            use_bias=False,
+            activation=NoOpActivation()),
+        norm_with_quantize(
+            axis=self._bn_axis,
+            momentum=self._batchnorm_momentum,
+            epsilon=self._batchnorm_epsilon)
+    ]
+    self._dropout_layer = tf.keras.layers.Dropout(rate=self._dropout)
+    concat = _quantize_wrapped_layer(
+        tf.keras.layers.Concatenate,
+        configs.Default8BitQuantizeConfig([], [], True))
+    self._concat_layer = concat(axis=-1)
+
+  def call(self,
+           inputs: tf.Tensor,
+           training: Optional[bool] = None) -> tf.Tensor:
+    if training is None:
+      training = tf.keras.backend.learning_phase()
+    result = []
+    for i, layers in enumerate(self.aspp_layers):
+      x = inputs
+      for layer in layers:
+        # Apply layers sequentially.
+        x = layer(x, training=training)
+      x = self._activation_fn(x)
+
+      # Apply resize layer to the end of the last set of layers.
+      if i == len(self.aspp_layers) - 1:
+        x = self._resizing_layer(x)
+
+      result.append(tf.cast(x, inputs.dtype))
+    x = self._concat_layer(result)
+    for layer in self._projection:
+      x = layer(x, training=training)
+    x = self._activation_fn_no_quant(x)
+    return self._dropout_layer(x)
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class ASPPQuantized(aspp.ASPP):
+  """Creates a quantized Atrous Spatial Pyramid Pooling (ASPP) layer."""
+
+  def __init__(
+      self,
+      level: int,
+      dilation_rates: List[int],
+      num_filters: int = 256,
+      pool_kernel_size: Optional[int] = None,
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      activation: str = 'relu',
+      dropout_rate: float = 0.0,
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      interpolation: str = 'bilinear',
+      use_depthwise_convolution: bool = False,
+      spp_layer_version: str = 'v1',
+      output_tensor: bool = True,
+      **kwargs):
+    """Initializes an Atrous Spatial Pyramid Pooling (ASPP) layer.
+
+    Args:
+      level: An `int` level to apply ASPP.
+      dilation_rates: A `list` of dilation rates.
+      num_filters: An `int` number of output filters in ASPP.
+      pool_kernel_size: A `list` of [height, width] of pooling kernel size or
+        None. Pooling size is with respect to original image size, it will be
+        scaled down by 2**level. If None, global average pooling is used.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      activation: A `str` activation to be used in ASPP.
+      dropout_rate: A `float` rate for dropout regularization.
+      kernel_initializer: A `str` name of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default is None.
+      interpolation: A `str` of interpolation method. It should be one of
+        `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
+        `gaussian`, or `mitchellcubic`.
+      use_depthwise_convolution: If True depthwise separable convolutions will
+        be added to the Atrous spatial pyramid pooling.
+      spp_layer_version: A `str` of spatial pyramid pooling layer version.
+      output_tensor: Whether to output a single tensor or a dictionary of
+        tensor. Default is true.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super().__init__(
+        level=level,
+        dilation_rates=dilation_rates,
+        num_filters=num_filters,
+        pool_kernel_size=pool_kernel_size,
+        use_sync_bn=use_sync_bn,
+        norm_momentum=norm_momentum,
+        norm_epsilon=norm_epsilon,
+        activation=activation,
+        dropout_rate=dropout_rate,
+        kernel_initializer=kernel_initializer,
+        kernel_regularizer=kernel_regularizer,
+        interpolation=interpolation,
+        use_depthwise_convolution=use_depthwise_convolution,
+        spp_layer_version=spp_layer_version,
+        output_tensor=output_tensor,
+        **kwargs)
+
+    self._aspp_layer = SpatialPyramidPoolingQuantized
+
+  def call(self, inputs: Union[tf.Tensor, Mapping[str,
+                                                  tf.Tensor]]) -> tf.Tensor:
+    """Calls the Atrous Spatial Pyramid Pooling (ASPP) layer on an input.
+
+    The output of ASPP will be a dict of {`level`, `tf.Tensor`} even if only one
+    level is present, if output_tensor is false. Hence, this will be compatible
+    with the rest of the segmentation model interfaces.
+    If output_tensor is true, a single tensot is output.
+
+    Args:
+      inputs: A `tf.Tensor` of shape [batch, height_l, width_l, filter_size] or
+        a `dict` of `tf.Tensor` where
+        - key: A `str` of the level of the multilevel feature maps.
+        - values: A `tf.Tensor` of shape [batch, height_l, width_l,
+          filter_size].
+
+    Returns:
+      A `tf.Tensor` of shape [batch, height_l, width_l, filter_size] or a `dict`
+        of `tf.Tensor` where
+        - key: A `str` of the level of the multilevel feature maps.
+        - values: A `tf.Tensor` of output of ASPP module.
+    """
+    level = str(self._config_dict['level'])
+    backbone_output = inputs[level] if isinstance(inputs, dict) else inputs
+    return self.aspp(backbone_output)
--- a/official/projects/qat/vision/modeling/layers/nn_layers_test.py
+++ b/official/projects/qat/vision/modeling/layers/nn_layers_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for nn_layers."""
+
+# Import libraries
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.projects.qat.vision.modeling.layers import nn_layers
+
+
+class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      ('deeplabv3plus', 1),
+      ('deeplabv3plus', 2),
+      ('deeplabv3', 1),
+      ('deeplabv3', 2),
+  )
+  def test_segmentation_head_creation(self, feature_fusion, upsample_factor):
+    input_size = 128
+    decoder_outupt_size = input_size // 2
+
+    decoder_output = tf.random.uniform(
+        (2, decoder_outupt_size, decoder_outupt_size, 64), dtype=tf.float32)
+    backbone_output = tf.random.uniform((2, input_size, input_size, 32),
+                                        dtype=tf.float32)
+    segmentation_head = nn_layers.SegmentationHeadQuantized(
+        num_classes=5,
+        level=4,
+        upsample_factor=upsample_factor,
+        low_level=2,
+        low_level_num_filters=128,
+        feature_fusion=feature_fusion)
+
+    features = segmentation_head((backbone_output, decoder_output))
+
+    expected_shape = (
+        input_size
+        if feature_fusion == 'deeplabv3plus' else decoder_outupt_size)
+    self.assertAllEqual([
+        2, expected_shape * upsample_factor, expected_shape * upsample_factor, 5
+    ], features.shape.as_list())
+
+  @parameterized.parameters(
+      (None, []),
+      (None, [6, 12, 18]),
+      ([32, 32], [6, 12, 18]),
+  )
+  def test_spatial_pyramid_pooling_creation(self, pool_kernel_size,
+                                            dilation_rates):
+    inputs = tf.keras.Input(shape=(64, 64, 128), dtype=tf.float32)
+    layer = nn_layers.SpatialPyramidPoolingQuantized(
+        output_channels=256,
+        dilation_rates=dilation_rates,
+        pool_kernel_size=pool_kernel_size)
+    output = layer(inputs)
+    self.assertAllEqual([None, 64, 64, 256], output.shape)
+
+  @parameterized.parameters(
+      (3, [6, 12, 18, 24], 128),
+      (3, [6, 12, 18], 128),
+      (3, [6, 12], 256),
+      (4, [], 128),
+      (4, [6, 12, 18], 128),
+      (4, [], 256),
+  )
+  def test_aspp_creation(self, level, dilation_rates, num_filters):
+    input_size = 128 // 2**level
+    tf.keras.backend.set_image_data_format('channels_last')
+    endpoints = tf.random.uniform(
+        shape=(2, input_size, input_size, 64), dtype=tf.float32)
+
+    network = nn_layers.ASPPQuantized(
+        level=level, dilation_rates=dilation_rates, num_filters=num_filters)
+
+    feats = network(endpoints)
+
+    self.assertAllEqual([2, input_size, input_size, num_filters],
+                        feats.shape.as_list())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/modeling/segmentation_model.py
+++ b/official/projects/qat/vision/modeling/segmentation_model.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Build segmentation models."""
+from typing import Any, Mapping, Union
+
+# Import libraries
+import tensorflow as tf
+
+layers = tf.keras.layers
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class SegmentationModelQuantized(tf.keras.Model):
+  """A Segmentation class model.
+
+  Input images are passed through backbone first. Decoder network is then
+  applied, and finally, segmentation head is applied on the output of the
+  decoder network. Layers such as ASPP should be part of decoder. Any feature
+  fusion is done as part of the segmentation head (i.e. deeplabv3+ feature
+  fusion is not part of the decoder, instead it is part of the segmentation
+  head). This way, different feature fusion techniques can be combined with
+  different backbones, and decoders.
+  """
+
+  def __init__(self, backbone: tf.keras.Model, decoder: tf.keras.layers.Layer,
+               head: tf.keras.layers.Layer,
+               input_specs: tf.keras.layers.InputSpec, **kwargs):
+    """Segmentation initialization function.
+
+    Args:
+      backbone: a backbone network.
+      decoder: a decoder network. E.g. FPN.
+      head: segmentation head.
+      input_specs: The shape specifications of input tensor.
+      **kwargs: keyword arguments to be passed.
+    """
+    inputs = tf.keras.Input(shape=input_specs.shape[1:], name=input_specs.name)
+    backbone_features = backbone(inputs)
+
+    if decoder:
+      backbone_feature = backbone_features[str(decoder.get_config()['level'])]
+      decoder_feature = decoder(backbone_feature)
+    else:
+      decoder_feature = backbone_features
+
+    backbone_feature = backbone_features[str(head.get_config()['low_level'])]
+    x = {'logits': head((backbone_feature, decoder_feature))}
+    super().__init__(inputs=inputs, outputs=x, **kwargs)
+    self._config_dict = {
+        'backbone': backbone,
+        'decoder': decoder,
+        'head': head,
+    }
+    self.backbone = backbone
+    self.decoder = decoder
+    self.head = head
+
+  @property
+  def checkpoint_items(
+      self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]:
+    """Returns a dictionary of items to be additionally checkpointed."""
+    items = dict(backbone=self.backbone, head=self.head)
+    if self.decoder is not None:
+      items.update(decoder=self.decoder)
+    return items
+
+  def get_config(self) -> Mapping[str, Any]:
+    return self._config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
--- a/official/projects/qat/vision/n_bit/__init__.py
+++ b/official/projects/qat/vision/n_bit/__init__.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Configs package definition."""
+
+from official.projects.qat.vision.n_bit import configs
+from official.projects.qat.vision.n_bit import schemes
+from official.projects.qat.vision.n_bit.nn_blocks import BottleneckBlockNBitQuantized
+from official.projects.qat.vision.n_bit.nn_blocks import Conv2DBNBlockNBitQuantized
+from official.projects.qat.vision.n_bit.nn_blocks import InvertedBottleneckBlockNBitQuantized
--- a/official/projects/qat/vision/n_bit/configs.py
+++ b/official/projects/qat/vision/n_bit/configs.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Default 8-bit QuantizeConfigs."""
+from typing import Sequence, Callable, Tuple, Any, Dict
+
+import tensorflow as tf
+import tensorflow_model_optimization as tfmot
+
+
+Quantizer = tfmot.quantization.keras.quantizers.Quantizer
+Layer = tf.keras.layers.Layer
+Activation = Callable[[tf.Tensor], tf.Tensor]
+WeightAndQuantizer = Tuple[tf.Variable, Quantizer]
+ActivationAndQuantizer = Tuple[Activation, Quantizer]
+
+
+class DefaultNBitOutputQuantizeConfig(
+    tfmot.quantization.keras.QuantizeConfig):
+  """QuantizeConfig which only quantizes the output from a layer."""
+
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+  def get_weights_and_quantizers(
+      self, layer: Layer) -> Sequence[WeightAndQuantizer]:
+    return []
+
+  def get_activations_and_quantizers(
+      self, layer: Layer) -> Sequence[ActivationAndQuantizer]:
+    return []
+
+  def set_quantize_weights(self,
+                           layer: Layer,
+                           quantize_weights: Sequence[tf.Tensor]):
+    pass
+
+  def set_quantize_activations(self,
+                               layer: Layer,
+                               quantize_activations: Sequence[Activation]):
+    pass
+
+  def get_output_quantizers(self, layer: Layer) -> Sequence[Quantizer]:
+    return [
+        tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
+            num_bits=self._num_bits_activation, per_axis=False,
+            symmetric=False, narrow_range=False)  # activation/output
+    ]
+
+  def get_config(self) -> Dict[str, Any]:
+    return {
+        'num_bits_weight': self._num_bits_weight,
+        'num_bits_activation': self._num_bits_activation,
+    }
+
+
+class NoOpQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
+  """QuantizeConfig which does not quantize any part of the layer."""
+
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+  def get_weights_and_quantizers(
+      self, layer: Layer) -> Sequence[WeightAndQuantizer]:
+    return []
+
+  def get_activations_and_quantizers(
+      self, layer: Layer) -> Sequence[ActivationAndQuantizer]:
+    return []
+
+  def set_quantize_weights(
+      self,
+      layer: Layer,
+      quantize_weights: Sequence[tf.Tensor]):
+    pass
+
+  def set_quantize_activations(
+      self,
+      layer: Layer,
+      quantize_activations: Sequence[Activation]):
+    pass
+
+  def get_output_quantizers(self, layer: Layer) -> Sequence[Quantizer]:
+    return []
+
+  def get_config(self) -> Dict[str, Any]:
+    return {
+        'num_bits_weight': self._num_bits_weight,
+        'num_bits_activation': self._num_bits_activation,
+    }
+
+
+class DefaultNBitQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
+  """QuantizeConfig for non recurrent Keras layers."""
+
+  def __init__(self,
+               weight_attrs: Sequence[str],
+               activation_attrs: Sequence[str],
+               quantize_output: bool,
+               num_bits_weight: int = 8,
+               num_bits_activation: int = 8):
+    """Initializes a default N-bit quantize config."""
+    self.weight_attrs = weight_attrs
+    self.activation_attrs = activation_attrs
+    self.quantize_output = quantize_output
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+    # TODO(pulkitb): For some layers such as Conv2D, per_axis should be True.
+    # Add mapping for which layers support per_axis.
+    self.weight_quantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer(
+        num_bits=num_bits_weight, per_axis=False,
+        symmetric=True, narrow_range=True)  # weight
+    self.activation_quantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
+        num_bits=num_bits_activation, per_axis=False,
+        symmetric=False, narrow_range=False)  # activation/output
+
+  def get_weights_and_quantizers(
+      self, layer: Layer) -> Sequence[WeightAndQuantizer]:
+    """See base class."""
+    return [(getattr(layer, weight_attr), self.weight_quantizer)
+            for weight_attr in self.weight_attrs]
+
+  def get_activations_and_quantizers(
+      self, layer: Layer) -> Sequence[ActivationAndQuantizer]:
+    """See base class."""
+    return [(getattr(layer, activation_attr), self.activation_quantizer)
+            for activation_attr in self.activation_attrs]
+
+  def set_quantize_weights(
+      self,
+      layer: Layer,
+      quantize_weights: Sequence[tf.Tensor]):
+    """See base class."""
+    if len(self.weight_attrs) != len(quantize_weights):
+      raise ValueError(
+          '`set_quantize_weights` called on layer {} with {} '
+          'weight parameters, but layer expects {} values.'.format(
+              layer.name, len(quantize_weights), len(self.weight_attrs)))
+
+    for weight_attr, weight in zip(self.weight_attrs, quantize_weights):
+      current_weight = getattr(layer, weight_attr)
+      if current_weight.shape != weight.shape:
+        raise ValueError('Existing layer weight shape {} is incompatible with'
+                         'provided weight shape {}'.format(
+                             current_weight.shape, weight.shape))
+
+      setattr(layer, weight_attr, weight)
+
+  def set_quantize_activations(
+      self,
+      layer: Layer,
+      quantize_activations: Sequence[Activation]):
+    """See base class."""
+    if len(self.activation_attrs) != len(quantize_activations):
+      raise ValueError(
+          '`set_quantize_activations` called on layer {} with {} '
+          'activation parameters, but layer expects {} values.'.format(
+              layer.name, len(quantize_activations),
+              len(self.activation_attrs)))
+
+    for activation_attr, activation in zip(
+        self.activation_attrs, quantize_activations):
+      setattr(layer, activation_attr, activation)
+
+  def get_output_quantizers(self, layer: Layer) -> Sequence[Quantizer]:
+    """See base class."""
+    if self.quantize_output:
+      return [self.activation_quantizer]
+    return []
+
+  @classmethod
+  def from_config(cls, config: Dict[str, Any]) -> object:
+    """Instantiates a `DefaultNBitQuantizeConfig` from its config.
+
+    Args:
+        config: Output of `get_config()`.
+
+    Returns:
+        A `DefaultNBitQuantizeConfig` instance.
+    """
+    return cls(**config)
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config for this quantize config."""
+    # TODO(pulkitb): Add weight and activation quantizer to config.
+    # Currently it's created internally, but ideally the quantizers should be
+    # part of the constructor and passed in from the registry.
+    return {
+        'weight_attrs': self.weight_attrs,
+        'activation_attrs': self.activation_attrs,
+        'quantize_output': self.quantize_output,
+        'num_bits_weight': self._num_bits_weight,
+        'num_bits_activation': self._num_bits_activation
+    }
+
+  def __eq__(self, other):
+    if not isinstance(other, DefaultNBitQuantizeConfig):
+      return False
+
+    return (self.weight_attrs == other.weight_attrs and
+            self.activation_attrs == self.activation_attrs and
+            self.weight_quantizer == other.weight_quantizer and
+            self.activation_quantizer == other.activation_quantizer and
+            self.quantize_output == other.quantize_output)
+
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+
+class DefaultNBitConvWeightsQuantizer(
+    tfmot.quantization.keras.quantizers.LastValueQuantizer):
+  """Quantizer for handling weights in Conv2D/DepthwiseConv2D layers."""
+
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    """Construct LastValueQuantizer with params specific for TFLite Convs."""
+
+    super(DefaultNBitConvWeightsQuantizer, self).__init__(
+        num_bits=num_bits_weight, per_axis=True,
+        symmetric=True, narrow_range=True)  # weight
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+  def build(self,
+            tensor_shape: tf.TensorShape,
+            name: str,
+            layer: Layer):
+    """Build min/max quantization variables."""
+    min_weight = layer.add_weight(
+        name + '_min',
+        shape=(tensor_shape[-1],),
+        initializer=tf.keras.initializers.Constant(-6.0),
+        trainable=False)
+    max_weight = layer.add_weight(
+        name + '_max',
+        shape=(tensor_shape[-1],),
+        initializer=tf.keras.initializers.Constant(6.0),
+        trainable=False)
+
+    return {'min_var': min_weight, 'max_var': max_weight}
+
+
+class NoQuantizer(tfmot.quantization.keras.quantizers.Quantizer):
+  """Dummy quantizer for explicitly not quantize."""
+
+  def __call__(self, inputs, training, weights, **kwargs):
+    return tf.identity(inputs)
+
+  def get_config(self):
+    return {}
+
+  def build(self, tensor_shape, name, layer):
+    return {}
+
+
+class DefaultNBitConvQuantizeConfig(DefaultNBitQuantizeConfig):
+  """QuantizeConfig for Conv2D/DepthwiseConv2D layers."""
+
+  def __init__(self,
+               weight_attrs: Sequence[str],
+               activation_attrs: Sequence[str],
+               quantize_output: bool,
+               num_bits_weight: int = 8,
+               num_bits_activation: int = 8):
+    """Initializes default N-bit quantization config for the conv layer."""
+    super().__init__(weight_attrs=weight_attrs,
+                     activation_attrs=activation_attrs,
+                     quantize_output=quantize_output,
+                     num_bits_weight=num_bits_weight,
+                     num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+    self.weight_quantizer = DefaultNBitConvWeightsQuantizer(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+
+
+class DefaultNBitActivationQuantizeConfig(
+    tfmot.quantization.keras.QuantizeConfig):
+  """QuantizeConfig for keras.layers.Activation.
+
+  `keras.layers.Activation` needs a separate `QuantizeConfig` since the
+  decision to quantize depends on the specific activation type.
+  """
+
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+  def _assert_activation_layer(self, layer: Layer):
+    if not isinstance(layer, tf.keras.layers.Activation):
+      raise RuntimeError(
+          'DefaultNBitActivationQuantizeConfig can only be used with '
+          '`keras.layers.Activation`.')
+
+  def get_weights_and_quantizers(
+      self, layer: Layer) -> Sequence[WeightAndQuantizer]:
+    """See base class."""
+    self._assert_activation_layer(layer)
+    return []
+
+  def get_activations_and_quantizers(
+      self, layer: Layer) -> Sequence[ActivationAndQuantizer]:
+    """See base class."""
+    self._assert_activation_layer(layer)
+    return []
+
+  def set_quantize_weights(
+      self,
+      layer: Layer,
+      quantize_weights: Sequence[tf.Tensor]):
+    """See base class."""
+    self._assert_activation_layer(layer)
+
+  def set_quantize_activations(
+      self,
+      layer: Layer,
+      quantize_activations: Sequence[Activation]):
+    """See base class."""
+    self._assert_activation_layer(layer)
+
+  def get_output_quantizers(self, layer: Layer) -> Sequence[Quantizer]:
+    """See base class."""
+    self._assert_activation_layer(layer)
+
+    if not hasattr(layer.activation, '__name__'):
+      raise ValueError('Activation {} not supported by '
+                       'DefaultNBitActivationQuantizeConfig.'.format(
+                           layer.activation))
+
+    # This code is copied from TFMOT repo, but added relu6 to support mobilenet.
+    if layer.activation.__name__ in ['relu', 'relu6', 'swish']:
+      # 'relu' should generally get fused into the previous layer.
+      return [tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
+          num_bits=self._num_bits_activation, per_axis=False,
+          symmetric=False, narrow_range=False)]  # activation/output
+    elif layer.activation.__name__ in ['linear', 'softmax', 'sigmoid']:
+      return []
+
+    raise ValueError('Activation {} not supported by '
+                     'DefaultNBitActivationQuantizeConfig.'.format(
+                         layer.activation))
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config for this quantizer config."""
+    return {
+        'num_bits_weight': self._num_bits_weight,
+        'num_bits_activation': self._num_bits_activation,
+    }
+
+
+def _types_dict():
+  return {
+      'DefaultNBitOutputQuantizeConfig':
+          DefaultNBitOutputQuantizeConfig,
+      'NoOpQuantizeConfig':
+          NoOpQuantizeConfig,
+      'DefaultNBitQuantizeConfig':
+          DefaultNBitQuantizeConfig,
+      'DefaultNBitConvWeightsQuantizer':
+          DefaultNBitConvWeightsQuantizer,
+      'DefaultNBitConvQuantizeConfig':
+          DefaultNBitConvQuantizeConfig,
+      'DefaultNBitActivationQuantizeConfig':
+          DefaultNBitActivationQuantizeConfig,
+  }
--- a/official/projects/qat/vision/n_bit/configs_test.py
+++ b/official/projects/qat/vision/n_bit/configs_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for configs.py."""
+
+# Import libraries
+
+import numpy as np
+import tensorflow as tf
+
+import tensorflow_model_optimization as tfmot
+
+from official.projects.qat.vision.n_bit import configs
+
+
+class _TestHelper(object):
+
+  def _convert_list(self, list_of_tuples):
+    """Transforms a list of 2-tuples to a tuple of 2 lists.
+
+    `QuantizeConfig` methods return a list of 2-tuples in the form
+    [(weight1, quantizer1), (weight2, quantizer2)]. This function converts
+    it into a 2-tuple of lists. ([weight1, weight2]), (quantizer1, quantizer2).
+
+    Args:
+      list_of_tuples: List of 2-tuples.
+
+    Returns:
+      2-tuple of lists.
+    """
+    list1 = []
+    list2 = []
+    for a, b in list_of_tuples:
+      list1.append(a)
+      list2.append(b)
+
+    return list1, list2
+
+  # TODO(pulkitb): Consider asserting on full equality for quantizers.
+
+  def _assert_weight_quantizers(self, quantizer_list):
+    for quantizer in quantizer_list:
+      self.assertIsInstance(
+          quantizer,
+          tfmot.quantization.keras.quantizers.LastValueQuantizer)
+
+  def _assert_activation_quantizers(self, quantizer_list):
+    for quantizer in quantizer_list:
+      self.assertIsInstance(
+          quantizer,
+          tfmot.quantization.keras.quantizers.MovingAverageQuantizer)
+
+  def _assert_kernel_equality(self, a, b):
+    self.assertAllEqual(a.numpy(), b.numpy())
+
+
+class DefaultNBitQuantizeConfigTest(tf.test.TestCase, _TestHelper):
+
+  def _simple_dense_layer(self):
+    layer = tf.keras.layers.Dense(2)
+    layer.build(input_shape=(3,))
+    return layer
+
+  def testGetsQuantizeWeightsAndQuantizers(self):
+    layer = self._simple_dense_layer()
+    num_bits_weight = 4
+    num_bits_activation = 4
+
+    quantize_config = configs.DefaultNBitQuantizeConfig(
+        ['kernel'], ['activation'], False, num_bits_weight, num_bits_activation)
+    (weights, weight_quantizers) = self._convert_list(
+        quantize_config.get_weights_and_quantizers(layer))
+
+    self._assert_weight_quantizers(weight_quantizers)
+    self.assertEqual([layer.kernel], weights)
+
+  def testGetsQuantizeActivationsAndQuantizers(self):
+    layer = self._simple_dense_layer()
+    num_bits_weight = 4
+    num_bits_activation = 4
+
+    quantize_config = configs.DefaultNBitQuantizeConfig(
+        ['kernel'], ['activation'], False, num_bits_weight, num_bits_activation)
+    (activations, activation_quantizers) = self._convert_list(
+        quantize_config.get_activations_and_quantizers(layer))
+
+    self._assert_activation_quantizers(activation_quantizers)
+    self.assertEqual([layer.activation], activations)
+
+  def testSetsQuantizeWeights(self):
+    layer = self._simple_dense_layer()
+    quantize_kernel = tf.keras.backend.variable(
+        np.ones(layer.kernel.shape.as_list()))
+    num_bits_weight = 4
+    num_bits_activation = 4
+
+    quantize_config = configs.DefaultNBitQuantizeConfig(
+        ['kernel'], ['activation'], False, num_bits_weight, num_bits_activation)
+    quantize_config.set_quantize_weights(layer, [quantize_kernel])
+
+    self._assert_kernel_equality(layer.kernel, quantize_kernel)
+
+  def testSetsQuantizeActivations(self):
+    layer = self._simple_dense_layer()
+    quantize_activation = tf.keras.activations.relu
+    num_bits_weight = 4
+    num_bits_activation = 4
+
+    quantize_config = configs.DefaultNBitQuantizeConfig(
+        ['kernel'], ['activation'], False, num_bits_weight, num_bits_activation)
+    quantize_config.set_quantize_activations(layer, [quantize_activation])
+
+    self.assertEqual(layer.activation, quantize_activation)
+
+  def testSetsQuantizeWeights_ErrorOnWrongNumberOfWeights(self):
+    layer = self._simple_dense_layer()
+    quantize_kernel = tf.keras.backend.variable(
+        np.ones(layer.kernel.shape.as_list()))
+    num_bits_weight = 4
+    num_bits_activation = 4
+
+    quantize_config = configs.DefaultNBitQuantizeConfig(
+        ['kernel'], ['activation'], False, num_bits_weight, num_bits_activation)
+
+    with self.assertRaises(ValueError):
+      quantize_config.set_quantize_weights(layer, [])
+
+    with self.assertRaises(ValueError):
+      quantize_config.set_quantize_weights(layer,
+                                           [quantize_kernel, quantize_kernel])
+
+  def testSetsQuantizeWeights_ErrorOnWrongShapeOfWeight(self):
+    layer = self._simple_dense_layer()
+    quantize_kernel = tf.keras.backend.variable(np.ones([1, 2]))
+    num_bits_weight = 4
+    num_bits_activation = 4
+
+    quantize_config = configs.DefaultNBitQuantizeConfig(
+        ['kernel'], ['activation'], False, num_bits_weight, num_bits_activation)
+
+    with self.assertRaises(ValueError):
+      quantize_config.set_quantize_weights(layer, [quantize_kernel])
+
+  def testSetsQuantizeActivations_ErrorOnWrongNumberOfActivations(self):
+    layer = self._simple_dense_layer()
+    quantize_activation = tf.keras.activations.relu
+    num_bits_weight = 4
+    num_bits_activation = 4
+
+    quantize_config = configs.DefaultNBitQuantizeConfig(
+        ['kernel'], ['activation'], False, num_bits_weight, num_bits_activation)
+
+    with self.assertRaises(ValueError):
+      quantize_config.set_quantize_activations(layer, [])
+
+    with self.assertRaises(ValueError):
+      quantize_config.set_quantize_activations(
+          layer, [quantize_activation, quantize_activation])
+
+  def testGetsResultQuantizers_ReturnsQuantizer(self):
+    layer = self._simple_dense_layer()
+    num_bits_weight = 4
+    num_bits_activation = 4
+    quantize_config = configs.DefaultNBitQuantizeConfig(
+        [], [], True, num_bits_weight, num_bits_activation)
+
+    output_quantizers = quantize_config.get_output_quantizers(layer)
+
+    self.assertLen(output_quantizers, 1)
+    self._assert_activation_quantizers(output_quantizers)
+
+  def testGetsResultQuantizers_EmptyWhenFalse(self):
+    layer = self._simple_dense_layer()
+    num_bits_weight = 4
+    num_bits_activation = 4
+    quantize_config = configs.DefaultNBitQuantizeConfig(
+        [], [], False, num_bits_weight, num_bits_activation)
+
+    output_quantizers = quantize_config.get_output_quantizers(layer)
+
+    self.assertEqual([], output_quantizers)
+
+  def testSerialization(self):
+    num_bits_weight = 4
+    num_bits_activation = 4
+    quantize_config = configs.DefaultNBitQuantizeConfig(
+        ['kernel'], ['activation'], False, num_bits_weight, num_bits_activation)
+
+    expected_config = {
+        'class_name': 'DefaultNBitQuantizeConfig',
+        'config': {
+            'weight_attrs': ['kernel'],
+            'activation_attrs': ['activation'],
+            'quantize_output': False,
+            'num_bits_weight': 4,
+            'num_bits_activation': 4
+        }
+    }
+    serialized_quantize_config = tf.keras.utils.serialize_keras_object(
+        quantize_config)
+
+    self.assertEqual(expected_config, serialized_quantize_config)
+
+    quantize_config_from_config = tf.keras.utils.deserialize_keras_object(
+        serialized_quantize_config,
+        module_objects=globals(),
+        custom_objects=configs._types_dict())
+
+    self.assertEqual(quantize_config, quantize_config_from_config)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/n_bit/nn_blocks.py
+++ b/official/projects/qat/vision/n_bit/nn_blocks.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains quantized neural blocks for the QAT."""
+from typing import Any, Dict, Optional, Sequence, Union
+
+# Import libraries
+
+from absl import logging
+import tensorflow as tf
+
+import tensorflow_model_optimization as tfmot
+from official.modeling import tf_utils
+from official.projects.qat.vision.n_bit import configs
+from official.projects.qat.vision.n_bit import nn_layers as qat_nn_layers
+from official.vision.beta.modeling.layers import nn_layers
+
+
+class NoOpActivation:
+  """No-op activation which simply returns the incoming tensor.
+
+  This activation is required to distinguish between `keras.activations.linear`
+  which does the same thing. The main difference is that NoOpActivation should
+  not have any quantize operation applied to it.
+  """
+
+  def __call__(self, x: tf.Tensor) -> tf.Tensor:
+    return x
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this object."""
+    return {}
+
+  def __eq__(self, other: Any) -> bool:
+    if not other or not isinstance(other, NoOpActivation):
+      return False
+
+    return True
+
+  def __ne__(self, other: Any) -> bool:
+    return not self.__eq__(other)
+
+
+def _quantize_wrapped_layer(cls, quantize_config):
+  def constructor(*arg, **kwargs):
+    return tfmot.quantization.keras.QuantizeWrapperV2(
+        cls(*arg, **kwargs),
+        quantize_config)
+  return constructor
+
+
+# This class is copied from modeling.layers.nn_blocks.BottleneckBlock and apply
+# QAT.
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class BottleneckBlockNBitQuantized(tf.keras.layers.Layer):
+  """A quantized standard bottleneck block."""
+
+  def __init__(self,
+               filters: int,
+               strides: int,
+               dilation_rate: int = 1,
+               use_projection: bool = False,
+               se_ratio: Optional[float] = None,
+               resnetd_shortcut: bool = False,
+               stochastic_depth_drop_rate: Optional[float] = None,
+               kernel_initializer: str = 'VarianceScaling',
+               kernel_regularizer: tf.keras.regularizers.Regularizer = None,
+               bias_regularizer: tf.keras.regularizers.Regularizer = None,
+               activation: str = 'relu',
+               use_sync_bn: bool = False,
+               norm_momentum: float = 0.99,
+               norm_epsilon: float = 0.001,
+               bn_trainable: bool = True,
+               num_bits_weight: int = 8,
+               num_bits_activation: int = 8,  # pytype: disable=annotation-type-mismatch  # typed-keras
+               **kwargs):
+    """Initializes a standard bottleneck block with BN after convolutions.
+
+    Args:
+      filters: An `int` number of filters for the first two convolutions. Note
+        that the third and final convolution will use 4 times as many filters.
+      strides: An `int` block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      dilation_rate: An `int` dilation_rate of convolutions. Default to 1.
+      use_projection: A `bool` for whether this block should use a projection
+        shortcut (versus the default identity shortcut). This is usually `True`
+        for the first block of a block group, which may change the number of
+        filters and the resolution.
+      se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
+      resnetd_shortcut: A `bool`. If True, apply the resnetd style modification
+        to the shortcut connection.
+      stochastic_depth_drop_rate: A `float` or None. If not None, drop rate for
+        the stochastic depth layer.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
+        Default to None.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      bn_trainable: A `bool` that indicates whether batch norm layers should be
+        trainable. Default to True.
+      num_bits_weight: An `int` number of bits for the weight. Default to 8.
+      num_bits_activation: An `int` number of bits for the weight. Default to 8.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super().__init__(**kwargs)
+
+    self._filters = filters
+    self._strides = strides
+    self._dilation_rate = dilation_rate
+    self._use_projection = use_projection
+    self._se_ratio = se_ratio
+    self._resnetd_shortcut = resnetd_shortcut
+    self._use_sync_bn = use_sync_bn
+    self._activation = activation
+    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
+    self._kernel_initializer = kernel_initializer
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+    if use_sync_bn:
+      self._norm = _quantize_wrapped_layer(
+          tf.keras.layers.experimental.SyncBatchNormalization,
+          configs.NoOpQuantizeConfig())
+      self._norm_with_quantize = _quantize_wrapped_layer(
+          tf.keras.layers.experimental.SyncBatchNormalization,
+          configs.DefaultNBitOutputQuantizeConfig(
+              num_bits_weight=self._num_bits_weight,
+              num_bits_activation=self._num_bits_activation))
+    else:
+      self._norm = _quantize_wrapped_layer(
+          tf.keras.layers.BatchNormalization,
+          configs.NoOpQuantizeConfig())
+      self._norm_with_quantize = _quantize_wrapped_layer(
+          tf.keras.layers.BatchNormalization,
+          configs.DefaultNBitOutputQuantizeConfig(
+              num_bits_weight=self._num_bits_weight,
+              num_bits_activation=self._num_bits_activation))
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._bn_trainable = bn_trainable
+
+  def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
+    """Build variables and child layers to prepare for calling."""
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.DefaultNBitConvQuantizeConfig(
+            ['kernel'], ['activation'], False,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    if self._use_projection:
+      if self._resnetd_shortcut:
+        self._shortcut0 = tf.keras.layers.AveragePooling2D(
+            pool_size=2, strides=self._strides, padding='same')
+        self._shortcut1 = conv2d_quantized(
+            filters=self._filters * 4,
+            kernel_size=1,
+            strides=1,
+            use_bias=False,
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            bias_regularizer=self._bias_regularizer,
+            activation=NoOpActivation())
+      else:
+        self._shortcut = conv2d_quantized(
+            filters=self._filters * 4,
+            kernel_size=1,
+            strides=self._strides,
+            use_bias=False,
+            kernel_initializer=self._kernel_initializer,
+            kernel_regularizer=self._kernel_regularizer,
+            bias_regularizer=self._bias_regularizer,
+            activation=NoOpActivation())
+
+      self._norm0 = self._norm_with_quantize(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon,
+          trainable=self._bn_trainable)
+
+    self._conv1 = conv2d_quantized(
+        filters=self._filters,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm1 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon,
+        trainable=self._bn_trainable)
+    self._activation1 = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.DefaultNBitActivationQuantizeConfig(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+
+    self._conv2 = conv2d_quantized(
+        filters=self._filters,
+        kernel_size=3,
+        strides=self._strides,
+        dilation_rate=self._dilation_rate,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm2 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon,
+        trainable=self._bn_trainable)
+    self._activation2 = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.DefaultNBitActivationQuantizeConfig(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+
+    self._conv3 = conv2d_quantized(
+        filters=self._filters * 4,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm3 = self._norm_with_quantize(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon,
+        trainable=self._bn_trainable)
+    self._activation3 = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.DefaultNBitActivationQuantizeConfig(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+
+    if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1:
+      self._squeeze_excitation = qat_nn_layers.SqueezeExcitationNBitQuantized(
+          in_filters=self._filters * 4,
+          out_filters=self._filters * 4,
+          se_ratio=self._se_ratio,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          num_bits_weight=self._num_bits_weight,
+          num_bits_activation=self._num_bits_activation)
+    else:
+      self._squeeze_excitation = None
+
+    if self._stochastic_depth_drop_rate:
+      self._stochastic_depth = nn_layers.StochasticDepth(
+          self._stochastic_depth_drop_rate)
+    else:
+      self._stochastic_depth = None
+    self._add = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf.keras.layers.Add(),
+        configs.DefaultNBitQuantizeConfig(
+            [], [], True,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+
+    super().build(input_shape)
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this layer."""
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'dilation_rate': self._dilation_rate,
+        'use_projection': self._use_projection,
+        'se_ratio': self._se_ratio,
+        'resnetd_shortcut': self._resnetd_shortcut,
+        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon,
+        'bn_trainable': self._bn_trainable,
+        'num_bits_weight': self._num_bits_weight,
+        'num_bits_activation': self._num_bits_activation
+    }
+    base_config = super().get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(
+      self,
+      inputs: tf.Tensor,
+      training: Optional[Union[bool, tf.Tensor]] = None) -> tf.Tensor:
+    """Run the BottleneckBlockQuantized logics."""
+    shortcut = inputs
+    if self._use_projection:
+      if self._resnetd_shortcut:
+        shortcut = self._shortcut0(shortcut)
+        shortcut = self._shortcut1(shortcut)
+      else:
+        shortcut = self._shortcut(shortcut)
+      shortcut = self._norm0(shortcut)
+
+    x = self._conv1(inputs)
+    x = self._norm1(x)
+    x = self._activation1(x)
+
+    x = self._conv2(x)
+    x = self._norm2(x)
+    x = self._activation2(x)
+
+    x = self._conv3(x)
+    x = self._norm3(x)
+
+    if self._squeeze_excitation:
+      x = self._squeeze_excitation(x)
+
+    if self._stochastic_depth:
+      x = self._stochastic_depth(x, training=training)
+
+    x = self._add([x, shortcut])
+    return self._activation3(x)
+
+
+# This class is copied from modeling.backbones.mobilenet.Conv2DBNBlock and apply
+# QAT.
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class Conv2DBNBlockNBitQuantized(tf.keras.layers.Layer):
+  """A quantized convolution block with batch normalization."""
+
+  def __init__(
+      self,
+      filters: int,
+      kernel_size: int = 3,
+      strides: int = 1,
+      use_bias: bool = False,
+      activation: str = 'relu6',
+      kernel_initializer: str = 'VarianceScaling',
+      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
+      use_normalization: bool = True,
+      use_sync_bn: bool = False,
+      norm_momentum: float = 0.99,
+      norm_epsilon: float = 0.001,
+      num_bits_weight: int = 8,
+      num_bits_activation: int = 8,
+      **kwargs):
+    """A convolution block with batch normalization.
+
+    Args:
+      filters: An `int` number of filters for the first two convolutions. Note
+        that the third and final convolution will use 4 times as many filters.
+      kernel_size: An `int` specifying the height and width of the 2D
+        convolution window.
+      strides: An `int` of block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      use_bias: If True, use bias in the convolution layer.
+      activation: A `str` name of the activation function.
+      kernel_initializer: A `str` for kernel initializer of convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
+        Default to None.
+      use_normalization: If True, use batch normalization.
+      use_sync_bn: If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      num_bits_weight: An `int` number of bits for the weight. Default to 8.
+      num_bits_activation: An `int` number of bits for the weight. Default to 8.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super().__init__(**kwargs)
+    self._filters = filters
+    self._kernel_size = kernel_size
+    self._strides = strides
+    self._activation = activation
+    self._use_bias = use_bias
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._use_normalization = use_normalization
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+    if use_sync_bn:
+      self._norm = _quantize_wrapped_layer(
+          tf.keras.layers.experimental.SyncBatchNormalization,
+          configs.NoOpQuantizeConfig())
+    else:
+      self._norm = _quantize_wrapped_layer(
+          tf.keras.layers.BatchNormalization,
+          configs.NoOpQuantizeConfig())
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this layer."""
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'kernel_size': self._kernel_size,
+        'use_bias': self._use_bias,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'use_normalization': self._use_normalization,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon,
+        'num_bits_weight': self._num_bits_weight,
+        'num_bits_activation': self._num_bits_activation
+    }
+    base_config = super().get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
+    """Build variables and child layers to prepare for calling."""
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.DefaultNBitConvQuantizeConfig(
+            ['kernel'], ['activation'], False,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    self._conv0 = conv2d_quantized(
+        filters=self._filters,
+        kernel_size=self._kernel_size,
+        strides=self._strides,
+        padding='same',
+        use_bias=self._use_bias,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    if self._use_normalization:
+      self._norm0 = self._norm(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+    self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(self._activation, use_keras_layer=True),
+        configs.DefaultNBitActivationQuantizeConfig(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+
+    super(Conv2DBNBlockNBitQuantized, self).build(input_shape)
+
+  def call(
+      self,
+      inputs: tf.Tensor,
+      training: Optional[Union[bool, tf.Tensor]] = None) -> tf.Tensor:
+    """Run the Conv2DBNBlockNBitQuantized logics."""
+    x = self._conv0(inputs)
+    if self._use_normalization:
+      x = self._norm0(x)
+    return self._activation_layer(x)
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class InvertedBottleneckBlockNBitQuantized(tf.keras.layers.Layer):
+  """A quantized inverted bottleneck block."""
+
+  def __init__(self,
+               in_filters,
+               out_filters,
+               expand_ratio,
+               strides,
+               kernel_size=3,
+               se_ratio=None,
+               stochastic_depth_drop_rate=None,
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               se_inner_activation='relu',
+               se_gating_activation='sigmoid',
+               expand_se_in_filters=False,
+               depthwise_activation=None,
+               use_sync_bn=False,
+               dilation_rate=1,
+               divisible_by=1,
+               regularize_depthwise=False,
+               use_depthwise=True,
+               use_residual=True,
+               norm_momentum=0.99,
+               norm_epsilon=0.001,
+               num_bits_weight: int = 8,
+               num_bits_activation: int = 8,
+               **kwargs):
+    """Initializes an inverted bottleneck block with BN after convolutions.
+
+    Args:
+      in_filters: An `int` number of filters of the input tensor.
+      out_filters: An `int` number of filters of the output tensor.
+      expand_ratio: An `int` of expand_ratio for an inverted bottleneck block.
+      strides: An `int` block stride. If greater than 1, this block will
+        ultimately downsample the input.
+      kernel_size: An `int` kernel_size of the depthwise conv layer.
+      se_ratio: A `float` or None. If not None, se ratio for the squeeze and
+        excitation layer.
+      stochastic_depth_drop_rate: A `float` or None. if not None, drop rate for
+        the stochastic depth layer.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
+        Default to None.
+      activation: A `str` name of the activation function.
+      se_inner_activation: A `str` name of squeeze-excitation inner activation.
+      se_gating_activation: A `str` name of squeeze-excitation gating
+        activation.
+      expand_se_in_filters: A `bool` of whether or not to expand in_filter in
+        squeeze and excitation layer.
+      depthwise_activation: A `str` name of the activation function for
+        depthwise only.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      dilation_rate: An `int` that specifies the dilation rate to use for.
+      divisible_by: An `int` that ensures all inner dimensions are divisible by
+        this number.
+      dilated convolution: An `int` to specify the same value for all spatial
+        dimensions.
+      regularize_depthwise: A `bool` of whether or not apply regularization on
+        depthwise.
+      use_depthwise: A `bool` of whether to uses fused convolutions instead of
+        depthwise.
+      use_residual: A `bool` of whether to include residual connection between
+        input and output.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      num_bits_weight: An `int` number of bits for the weight. Default to 8.
+      num_bits_activation: An `int` number of bits for the weight. Default to 8.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super().__init__(**kwargs)
+
+    self._in_filters = in_filters
+    self._out_filters = out_filters
+    self._expand_ratio = expand_ratio
+    self._strides = strides
+    self._kernel_size = kernel_size
+    self._se_ratio = se_ratio
+    self._divisible_by = divisible_by
+    self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
+    self._dilation_rate = dilation_rate
+    self._use_sync_bn = use_sync_bn
+    self._regularize_depthwise = regularize_depthwise
+    self._use_depthwise = use_depthwise
+    self._use_residual = use_residual
+    self._activation = activation
+    self._se_inner_activation = se_inner_activation
+    self._se_gating_activation = se_gating_activation
+    self._depthwise_activation = depthwise_activation
+    self._kernel_initializer = kernel_initializer
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._expand_se_in_filters = expand_se_in_filters
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+    if use_sync_bn:
+      self._norm = _quantize_wrapped_layer(
+          tf.keras.layers.experimental.SyncBatchNormalization,
+          configs.NoOpQuantizeConfig())
+      self._norm_with_quantize = _quantize_wrapped_layer(
+          tf.keras.layers.experimental.SyncBatchNormalization,
+          configs.DefaultNBitOutputQuantizeConfig(
+              num_bits_weight=self._num_bits_weight,
+              num_bits_activation=self._num_bits_activation))
+    else:
+      self._norm = _quantize_wrapped_layer(
+          tf.keras.layers.BatchNormalization,
+          configs.NoOpQuantizeConfig())
+      self._norm_with_quantize = _quantize_wrapped_layer(
+          tf.keras.layers.BatchNormalization,
+          configs.DefaultNBitOutputQuantizeConfig(
+              num_bits_weight=self._num_bits_weight,
+              num_bits_activation=self._num_bits_activation))
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    if not depthwise_activation:
+      self._depthwise_activation = activation
+    if regularize_depthwise:
+      self._depthsize_regularizer = kernel_regularizer
+    else:
+      self._depthsize_regularizer = None
+
+  def build(self, input_shape: Optional[Union[Sequence[int], tf.Tensor]]):
+    """Build variables and child layers to prepare for calling."""
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.DefaultNBitConvQuantizeConfig(
+            ['kernel'], ['activation'], False,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    depthwise_conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.DepthwiseConv2D,
+        configs.DefaultNBitConvQuantizeConfig(
+            ['depthwise_kernel'], ['activation'], False,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    expand_filters = self._in_filters
+    if self._expand_ratio > 1:
+      # First 1x1 conv for channel expansion.
+      expand_filters = nn_layers.make_divisible(
+          self._in_filters * self._expand_ratio, self._divisible_by)
+
+      expand_kernel = 1 if self._use_depthwise else self._kernel_size
+      expand_stride = 1 if self._use_depthwise else self._strides
+
+      self._conv0 = conv2d_quantized(
+          filters=expand_filters,
+          kernel_size=expand_kernel,
+          strides=expand_stride,
+          padding='same',
+          use_bias=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=NoOpActivation())
+      self._norm0 = self._norm_with_quantize(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+      self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+          tf_utils.get_activation(self._activation, use_keras_layer=True),
+          configs.DefaultNBitActivationQuantizeConfig(
+              num_bits_weight=self._num_bits_weight,
+              num_bits_activation=self._num_bits_activation))
+
+    if self._use_depthwise:
+      # Depthwise conv.
+      self._conv1 = depthwise_conv2d_quantized(
+          kernel_size=(self._kernel_size, self._kernel_size),
+          strides=self._strides,
+          padding='same',
+          depth_multiplier=1,
+          dilation_rate=self._dilation_rate,
+          use_bias=False,
+          depthwise_initializer=self._kernel_initializer,
+          depthwise_regularizer=self._depthsize_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=NoOpActivation())
+      self._norm1 = self._norm_with_quantize(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+      self._depthwise_activation_layer = (
+          tfmot.quantization.keras.QuantizeWrapperV2(
+              tf_utils.get_activation(self._depthwise_activation,
+                                      use_keras_layer=True),
+              configs.DefaultNBitActivationQuantizeConfig(
+                  num_bits_weight=self._num_bits_weight,
+                  num_bits_activation=self._num_bits_activation)))
+
+    # Squeeze and excitation.
+    if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1:
+      logging.info('Use Squeeze and excitation.')
+      in_filters = self._in_filters
+      if self._expand_se_in_filters:
+        in_filters = expand_filters
+      self._squeeze_excitation = qat_nn_layers.SqueezeExcitationNBitQuantized(
+          in_filters=in_filters,
+          out_filters=expand_filters,
+          se_ratio=self._se_ratio,
+          divisible_by=self._divisible_by,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=self._se_inner_activation,
+          gating_activation=self._se_gating_activation,
+          num_bits_weight=self._num_bits_weight,
+          num_bits_activation=self._num_bits_activation)
+    else:
+      self._squeeze_excitation = None
+
+    # Last 1x1 conv.
+    self._conv2 = conv2d_quantized(
+        filters=self._out_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+    self._norm2 = self._norm_with_quantize(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+
+    if self._stochastic_depth_drop_rate:
+      self._stochastic_depth = nn_layers.StochasticDepth(
+          self._stochastic_depth_drop_rate)
+    else:
+      self._stochastic_depth = None
+    self._add = tf.keras.layers.Add()
+
+    super().build(input_shape)
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this layer."""
+    config = {
+        'in_filters': self._in_filters,
+        'out_filters': self._out_filters,
+        'expand_ratio': self._expand_ratio,
+        'strides': self._strides,
+        'kernel_size': self._kernel_size,
+        'se_ratio': self._se_ratio,
+        'divisible_by': self._divisible_by,
+        'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'se_inner_activation': self._se_inner_activation,
+        'se_gating_activation': self._se_gating_activation,
+        'expand_se_in_filters': self._expand_se_in_filters,
+        'depthwise_activation': self._depthwise_activation,
+        'dilation_rate': self._dilation_rate,
+        'use_sync_bn': self._use_sync_bn,
+        'regularize_depthwise': self._regularize_depthwise,
+        'use_depthwise': self._use_depthwise,
+        'use_residual': self._use_residual,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon,
+        'num_bits_weight': self._num_bits_weight,
+        'num_bits_activation': self._num_bits_activation
+    }
+    base_config = super().get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(
+      self,
+      inputs: tf.Tensor,
+      training: Optional[Union[bool, tf.Tensor]] = None) -> tf.Tensor:
+    """Run the InvertedBottleneckBlockNBitQuantized logics."""
+    shortcut = inputs
+    if self._expand_ratio > 1:
+      x = self._conv0(inputs)
+      x = self._norm0(x)
+      x = self._activation_layer(x)
+    else:
+      x = inputs
+
+    if self._use_depthwise:
+      x = self._conv1(x)
+      x = self._norm1(x)
+      x = self._depthwise_activation_layer(x)
+
+    if self._squeeze_excitation:
+      x = self._squeeze_excitation(x)
+
+    x = self._conv2(x)
+    x = self._norm2(x)
+
+    if (self._use_residual and
+        self._in_filters == self._out_filters and
+        self._strides == 1):
+      if self._stochastic_depth:
+        x = self._stochastic_depth(x, training=training)
+      x = self._add([x, shortcut])
+
+    return x
--- a/official/projects/qat/vision/n_bit/nn_blocks_test.py
+++ b/official/projects/qat/vision/n_bit/nn_blocks_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for nn_blocks."""
+
+from typing import Any, Iterable, Tuple
+# Import libraries
+from absl.testing import parameterized
+import tensorflow as tf
+
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import strategy_combinations
+from official.projects.qat.vision.n_bit import nn_blocks
+
+
+def distribution_strategy_combinations() -> Iterable[Tuple[Any, ...]]:
+  """Returns the combinations of end-to-end tests to run."""
+  return combinations.combine(
+      distribution=[
+          strategy_combinations.default_strategy,
+          strategy_combinations.cloud_tpu_strategy,
+          strategy_combinations.one_device_strategy_gpu,
+      ],
+  )
+
+
+class NNBlocksTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      (nn_blocks.BottleneckBlockNBitQuantized, 1, False, 0.0, None, 4, 4),
+      (nn_blocks.BottleneckBlockNBitQuantized, 2, True, 0.2, 0.25, 4, 4),
+  )
+  def test_bottleneck_block_creation(self, block_fn, strides, use_projection,
+                                     stochastic_depth_drop_rate, se_ratio,
+                                     num_bits_weight, num_bits_activation):
+    input_size = 128
+    filter_size = 256
+    inputs = tf.keras.Input(
+        shape=(input_size, input_size, filter_size * 4), batch_size=1)
+    block = block_fn(
+        filter_size,
+        strides,
+        use_projection=use_projection,
+        se_ratio=se_ratio,
+        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+
+    features = block(inputs)
+
+    self.assertAllEqual(
+        [1, input_size // strides, input_size // strides, filter_size * 4],
+        features.shape.as_list())
+
+  @parameterized.parameters(
+      (nn_blocks.InvertedBottleneckBlockNBitQuantized, 1, 1, None, None, 4, 4),
+      (nn_blocks.InvertedBottleneckBlockNBitQuantized, 6, 1, None, None, 4, 4),
+      (nn_blocks.InvertedBottleneckBlockNBitQuantized, 1, 2, None, None, 4, 4),
+      (nn_blocks.InvertedBottleneckBlockNBitQuantized, 1, 1, 0.2, None, 4, 4),
+      (nn_blocks.InvertedBottleneckBlockNBitQuantized, 1, 1, None, 0.2, 4, 4),
+  )
+  def test_invertedbottleneck_block_creation(
+      self, block_fn, expand_ratio, strides, se_ratio,
+      stochastic_depth_drop_rate, num_bits_weight, num_bits_activation):
+    input_size = 128
+    in_filters = 24
+    out_filters = 40
+    inputs = tf.keras.Input(
+        shape=(input_size, input_size, in_filters), batch_size=1)
+    block = block_fn(
+        in_filters=in_filters,
+        out_filters=out_filters,
+        expand_ratio=expand_ratio,
+        strides=strides,
+        se_ratio=se_ratio,
+        stochastic_depth_drop_rate=stochastic_depth_drop_rate,
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+
+    features = block(inputs)
+
+    self.assertAllEqual(
+        [1, input_size // strides, input_size // strides, out_filters],
+        features.shape.as_list())
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/qat/vision/n_bit/nn_layers.py
+++ b/official/projects/qat/vision/n_bit/nn_layers.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains common building blocks for neural networks."""
+
+from typing import Any, Callable, Dict, Union
+
+import tensorflow as tf
+import tensorflow_model_optimization as tfmot
+
+from official.modeling import tf_utils
+from official.projects.qat.vision.n_bit import configs
+from official.vision.beta.modeling.layers import nn_layers
+
+# Type annotations.
+States = Dict[str, tf.Tensor]
+Activation = Union[str, Callable]
+
+
+class NoOpActivation:
+  """No-op activation which simply returns the incoming tensor.
+
+  This activation is required to distinguish between `keras.activations.linear`
+  which does the same thing. The main difference is that NoOpActivation should
+  not have any quantize operation applied to it.
+  """
+
+  def __call__(self, x: tf.Tensor) -> tf.Tensor:
+    return x
+
+  def get_config(self) -> Dict[str, Any]:
+    """Get a config of this object."""
+    return {}
+
+  def __eq__(self, other: Any) -> bool:
+    return isinstance(other, NoOpActivation)
+
+  def __ne__(self, other: Any) -> bool:
+    return not self.__eq__(other)
+
+
+def _quantize_wrapped_layer(cls, quantize_config):
+  def constructor(*arg, **kwargs):
+    return tfmot.quantization.keras.QuantizeWrapperV2(
+        cls(*arg, **kwargs),
+        quantize_config)
+  return constructor
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class SqueezeExcitationNBitQuantized(tf.keras.layers.Layer):
+  """Creates a squeeze and excitation layer."""
+
+  def __init__(self,
+               in_filters,
+               out_filters,
+               se_ratio,
+               divisible_by=1,
+               use_3d_input=False,
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               gating_activation='sigmoid',
+               num_bits_weight=8,
+               num_bits_activation=8,
+               **kwargs):
+    """Initializes a squeeze and excitation layer.
+
+    Args:
+      in_filters: An `int` number of filters of the input tensor.
+      out_filters: An `int` number of filters of the output tensor.
+      se_ratio: A `float` or None. If not None, se ratio for the squeeze and
+        excitation layer.
+      divisible_by: An `int` that ensures all inner dimensions are divisible by
+        this number.
+      use_3d_input: A `bool` of whether input is 2D or 3D image.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
+        Default to None.
+      activation: A `str` name of the activation function.
+      gating_activation: A `str` name of the activation function for final
+        gating function.
+      num_bits_weight: An `int` number of bits for the weight. Default to 8.
+      num_bits_activation: An `int` number of bits for the weight. Default to 8.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    super().__init__(**kwargs)
+
+    self._in_filters = in_filters
+    self._out_filters = out_filters
+    self._se_ratio = se_ratio
+    self._divisible_by = divisible_by
+    self._use_3d_input = use_3d_input
+    self._activation = activation
+    self._gating_activation = gating_activation
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      if not use_3d_input:
+        self._spatial_axis = [1, 2]
+      else:
+        self._spatial_axis = [1, 2, 3]
+    else:
+      if not use_3d_input:
+        self._spatial_axis = [2, 3]
+      else:
+        self._spatial_axis = [2, 3, 4]
+    self._activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(activation, use_keras_layer=True),
+        configs.DefaultNBitActivationQuantizeConfig(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    self._gating_activation_layer = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf_utils.get_activation(gating_activation, use_keras_layer=True),
+        configs.DefaultNBitActivationQuantizeConfig(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+
+  def build(self, input_shape):
+    conv2d_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.DefaultNBitConvQuantizeConfig(
+            ['kernel'], ['activation'], False,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    conv2d_quantized_output_quantized = _quantize_wrapped_layer(
+        tf.keras.layers.Conv2D,
+        configs.DefaultNBitConvQuantizeConfig(
+            ['kernel'], ['activation'], True,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    num_reduced_filters = nn_layers.make_divisible(
+        max(1, int(self._in_filters * self._se_ratio)),
+        divisor=self._divisible_by)
+
+    self._se_reduce = conv2d_quantized(
+        filters=num_reduced_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=True,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+
+    self._se_expand = conv2d_quantized_output_quantized(
+        filters=self._out_filters,
+        kernel_size=1,
+        strides=1,
+        padding='same',
+        use_bias=True,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=NoOpActivation())
+
+    self._multiply = tfmot.quantization.keras.QuantizeWrapperV2(
+        tf.keras.layers.Multiply(),
+        configs.DefaultNBitQuantizeConfig(
+            [], [], True, num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    self._reduce_mean_quantizer = (
+        tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
+            num_bits=self._num_bits_activation, per_axis=False,
+            symmetric=False, narrow_range=False))  # activation/output
+    self._reduce_mean_quantizer_vars = self._reduce_mean_quantizer.build(
+        None, 'reduce_mean_quantizer_vars', self)
+
+    super().build(input_shape)
+
+  def get_config(self):
+    config = {
+        'in_filters': self._in_filters,
+        'out_filters': self._out_filters,
+        'se_ratio': self._se_ratio,
+        'divisible_by': self._divisible_by,
+        'use_3d_input': self._use_3d_input,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'gating_activation': self._gating_activation,
+        'num_bits_weight': self._num_bits_weight,
+        'num_bits_activation': self._num_bits_activation
+    }
+    base_config = super().get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(self, inputs, training=None):
+    x = tf.reduce_mean(inputs, self._spatial_axis, keepdims=True)
+    x = self._reduce_mean_quantizer(
+        x, training, self._reduce_mean_quantizer_vars)
+    x = self._activation_layer(self._se_reduce(x))
+    x = self._gating_activation_layer(self._se_expand(x))
+    x = self._multiply([x, inputs])
+    return x
--- a/official/projects/qat/vision/n_bit/schemes.py
+++ b/official/projects/qat/vision/n_bit/schemes.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Quantization schemes."""
+from typing import Type
+
+# Import libraries
+
+import tensorflow as tf
+
+import tensorflow_model_optimization as tfmot
+from official.projects.qat.vision.n_bit import configs
+from official.projects.qat.vision.n_bit import nn_blocks
+
+keras = tf.keras
+default_n_bit_transforms = tfmot.quantization.keras.experimental.default_n_bit.default_n_bit_transforms
+_LayerNode = tfmot.quantization.keras.graph_transformations.transforms.LayerNode
+_LayerPattern = tfmot.quantization.keras.graph_transformations.transforms.LayerPattern
+_ModelTransformer = tfmot.quantization.keras.graph_transformations.model_transformer.ModelTransformer
+
+_QUANTIZATION_WEIGHT_NAMES = [
+    'output_max', 'output_min', 'optimizer_step',
+    'kernel_min', 'kernel_max',
+    'depthwise_kernel_min', 'depthwise_kernel_max',
+    'reduce_mean_quantizer_vars_min', 'reduce_mean_quantizer_vars_max']
+
+_ORIGINAL_WEIGHT_NAME = [
+    'kernel', 'depthwise_kernel',
+    'gamma', 'beta', 'moving_mean', 'moving_variance',
+    'bias']
+
+
+class CustomLayerQuantize(
+    tfmot.quantization.keras.graph_transformations.transforms.Transform):
+  """Add QAT support for Keras Custom layer."""
+
+  def __init__(self,
+               original_layer_pattern: str,
+               quantized_layer_class: Type[keras.layers.Layer],
+               num_bits_weight: int = 8,
+               num_bits_activation: int = 8):
+    super().__init__()
+    self._original_layer_pattern = original_layer_pattern
+    self._quantized_layer_class = quantized_layer_class
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+  def pattern(self) -> _LayerPattern:
+    """See base class."""
+    return _LayerPattern(self._original_layer_pattern)
+
+  def _is_quantization_weight_name(self, name):
+    simple_name = name.split('/')[-1].split(':')[0]
+    if simple_name in _QUANTIZATION_WEIGHT_NAMES:
+      return True
+    if simple_name in _ORIGINAL_WEIGHT_NAME:
+      return False
+    raise ValueError(f'Variable name {simple_name} is not supported on '
+                     'CustomLayerQuantize({self._original_layer_pattern}) '
+                     'transform.')
+
+  def replacement(self, match_layer: _LayerNode) -> _LayerNode:
+    """See base class."""
+    bottleneck_layer = match_layer.layer
+    bottleneck_config = bottleneck_layer['config']
+    bottleneck_config['num_bits_weight'] = self._num_bits_weight
+    bottleneck_config['num_bits_activation'] = self._num_bits_activation
+    bottleneck_names_and_weights = list(match_layer.names_and_weights)
+    quantized_layer = self._quantized_layer_class(
+        **bottleneck_config)
+    dummy_input_shape = [1, 1, 1, 1]
+    quantized_layer.compute_output_shape(dummy_input_shape)
+    quantized_names_and_weights = zip(
+        [weight.name for weight in quantized_layer.weights],
+        quantized_layer.get_weights())
+    match_idx = 0
+    names_and_weights = []
+    for name_and_weight in quantized_names_and_weights:
+      if not self._is_quantization_weight_name(name=name_and_weight[0]):
+        name_and_weight = bottleneck_names_and_weights[match_idx]
+        match_idx = match_idx + 1
+      names_and_weights.append(name_and_weight)
+
+    if match_idx != len(bottleneck_names_and_weights):
+      raise ValueError('{}/{} of Bottleneck weights is transformed.'.format(
+          match_idx, len(bottleneck_names_and_weights)))
+    quantized_layer_config = keras.layers.serialize(quantized_layer)
+    quantized_layer_config['name'] = quantized_layer_config['config']['name']
+    layer_metadata = {
+        'quantize_config':
+            configs.DefaultNBitOutputQuantizeConfig(
+                num_bits_weight=self._num_bits_weight,
+                num_bits_activation=self._num_bits_activation)}
+
+    return _LayerNode(
+        quantized_layer_config,
+        metadata=layer_metadata,
+        names_and_weights=names_and_weights)
+
+
+class QuantizeLayoutTransform(
+    tfmot.quantization.keras.QuantizeLayoutTransform):
+  """Default model transformations."""
+
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+  def apply(self, model, layer_quantize_map):
+    """Implement default 8-bit transforms.
+
+    Currently this means the following.
+      1. Pull activations into layers, and apply fuse activations. (TODO)
+      2. Modify range in incoming layers for Concat. (TODO)
+      3. Fuse Conv2D/DepthwiseConv2D + BN into single layer.
+
+    Args:
+      model: Keras model to be quantized.
+      layer_quantize_map: Map with keys as layer names, and values as dicts
+        containing custom `QuantizeConfig`s which may have been passed with
+        layers.
+
+    Returns:
+      (Transformed Keras model to better match TensorFlow Lite backend, updated
+      layer quantize map.)
+    """
+
+    transforms = [
+        default_n_bit_transforms.InputLayerQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.SeparableConv1DQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.SeparableConvQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.Conv2DReshapeBatchNormReLUQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.Conv2DReshapeBatchNormActivationQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.Conv2DBatchNormReLUQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.Conv2DBatchNormActivationQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.Conv2DReshapeBatchNormQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.Conv2DBatchNormQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.ConcatTransform6Inputs(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.ConcatTransform5Inputs(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.ConcatTransform4Inputs(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.ConcatTransform3Inputs(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.ConcatTransform(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.LayerReLUQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        default_n_bit_transforms.LayerReluActivationQuantize(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        CustomLayerQuantize(
+            'Vision>BottleneckBlock',
+            nn_blocks.BottleneckBlockNBitQuantized,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        CustomLayerQuantize(
+            'Vision>InvertedBottleneckBlock',
+            nn_blocks.InvertedBottleneckBlockNBitQuantized,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        CustomLayerQuantize(
+            'Vision>Conv2DBNBlock',
+            nn_blocks.Conv2DBNBlockNBitQuantized,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        # TODO(yeqing): Remove the `Beta` custom layers.
+        CustomLayerQuantize(
+            'Beta>BottleneckBlock',
+            nn_blocks.BottleneckBlockNBitQuantized,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        CustomLayerQuantize(
+            'Beta>InvertedBottleneckBlock',
+            nn_blocks.InvertedBottleneckBlockNBitQuantized,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+        CustomLayerQuantize(
+            'Beta>Conv2DBNBlock',
+            nn_blocks.Conv2DBNBlockNBitQuantized,
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation),
+    ]
+    return _ModelTransformer(model, transforms, set(layer_quantize_map.keys()),
+                             layer_quantize_map).transform()
+
+
+class DefaultNBitQuantizeScheme(tfmot.quantization.keras.experimental
+                                .default_n_bit.DefaultNBitQuantizeScheme):
+  """Default N-bit Scheme."""
+
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(DefaultNBitQuantizeScheme, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+
+  def get_layout_transformer(self):
+    return QuantizeLayoutTransform(
+        num_bits_weight=self._num_bits_weight,
+        num_bits_activation=self._num_bits_activation)
+
--- a/official/projects/qat/vision/quantization/__init__.py
+++ b/official/projects/qat/vision/quantization/__init__.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Configs package definition."""
+
+from official.projects.qat.vision.quantization import configs
+from official.projects.qat.vision.quantization import schemes