Makes MV3 e2e quantized without any additional dequant op.

PiperOrigin-RevId: 430869666

Makes MV3 e2e quantized without any additional dequant op.
PiperOrigin-RevId: 430869666
720d6e39 · Jaehong Kim · A. Unique TensorFlower · dc804f33 · 720d6e39 · 720d6e39
Commit 720d6e39 authored Feb 24, 2022 by Jaehong Kim Committed by A. Unique TensorFlower Feb 24, 2022
4 changed files
--- a/official/projects/qat/vision/modeling/layers/nn_blocks.py
+++ b/official/projects/qat/vision/modeling/layers/nn_blocks.py
@@ -435,7 +435,7 @@ class Conv2DBNBlockQuantized(tf.keras.layers.Layer):
    conv2d_quantized = _quantize_wrapped_layer(
        tf.keras.layers.Conv2D,
        configs.Default8BitConvQuantizeConfig(
-            ['kernel'], ['activation'], False))
+            ['kernel'], ['activation'], not self._use_normalization))
    self._conv0 = conv2d_quantized(
        filters=self._filters,
        kernel_size=self._kernel_size,

--- a/official/projects/qat/vision/modeling/layers/nn_layers.py
+++ b/official/projects/qat/vision/modeling/layers/nn_layers.py
@@ -21,6 +21,7 @@ import tensorflow as tf
 import tensorflow_model_optimization as tfmot
 from official.modeling import tf_utils
 from official.projects.qat.vision.quantization import configs
+from official.projects.qat.vision.quantization import helper
 from official.vision.beta.modeling.decoders import aspp
 from official.vision.beta.modeling.layers import nn_layers
@@ -61,7 +62,9 @@ def _quantize_wrapped_layer(cls, quantize_config):
 @tf.keras.utils.register_keras_serializable(package='Vision')
-class SqueezeExcitationQuantized(tf.keras.layers.Layer):
+class SqueezeExcitationQuantized(
+    helper.LayerQuantizerHelper,
+    tf.keras.layers.Layer):
  """Creates a squeeze and excitation layer."""
  def __init__(self,
@@ -129,9 +132,8 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer):
      # Convert hard_sigmoid activation to quantizable keras layers so each op
      # can be properly quantized.
      # Formula is hard_sigmoid(x) = relu6(x + 3) * 0.16667.
-      self._add = tfmot.quantization.keras.QuantizeWrapperV2(
+      self._add_quantizer('add_three')
-          tf.keras.layers.Add(), configs.Default8BitQuantizeConfig([], [],
+      self._add_quantizer('divide_six')
-                                                                   True))
      self._relu6 = tfmot.quantization.keras.QuantizeWrapperV2(
          tf_utils.get_activation('relu6', use_keras_layer=True),
          configs.Default8BitActivationQuantizeConfig())
@@ -141,11 +143,12 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer):
              self._gating_activation, use_keras_layer=True),
          configs.Default8BitActivationQuantizeConfig())
-  def _apply_gating_activation_layer(self, x: tf.Tensor) -> tf.Tensor:
+  def _apply_gating_activation_layer(
+      self, x: tf.Tensor, training: bool) -> tf.Tensor:
    if self._gating_activation == 'hard_sigmoid':
-      x = self._add([x, 3.0 * tf.ones_like(x)])
+      x = self._apply_quantizer('add_three', x + 3.0, training)
      x = self._relu6(x)
-      x = self._multiply([x, 0.16667 * tf.ones_like(x)])
+      x = self._apply_quantizer('divide_six', x * 1.6667, training)
    else:
      x = self._gating_activation_layer(x)
    return x
@@ -200,6 +203,7 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer):
        configs.Default8BitActivationQuantizeConfig())
    self._create_gating_activation_layer()
+    self._build_quantizer_vars()
    super().build(input_shape)
  def get_config(self):
@@ -224,7 +228,7 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer):
    x = self._reduce_mean_quantizer(
        x, training, self._reduce_mean_quantizer_vars)
    x = self._activation_layer(self._se_reduce(x))
-    x = self._apply_gating_activation_layer(self._se_expand(x))
+    x = self._apply_gating_activation_layer(self._se_expand(x), training)
    x = self._multiply([x, inputs])
    return x

--- a/official/projects/qat/vision/quantization/helper.py
+++ b/official/projects/qat/vision/quantization/helper.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Quantization helpers."""
+import tensorflow_model_optimization as tfmot
+class LayerQuantizerHelper(object):
+  """Helper class that handles quantizers."""
+  def __init__(self, *args, **kwargs):
+    self._quantizers = {}
+    self._quantizer_vars = {}
+    super().__init__(*args, **kwargs)
+  def _all_value_quantizer(self):
+    return tfmot.quantization.keras.quantizers.AllValuesQuantizer(
+        num_bits=8, per_axis=False, symmetric=False, narrow_range=False)
+  def _moving_average_quantizer(self):
+    return tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
+        num_bits=8, per_axis=False, symmetric=False, narrow_range=False)
+  def _add_quantizer(self, name, all_value_quantizer=False):
+    if all_value_quantizer:
+      self._quantizers[name] = self._all_value_quantizer()
+    else:
+      self._quantizers[name] = self._moving_average_quantizer()
+  def _apply_quantizer(self, name, inputs, training, **kwargs):
+    return self._quantizers[name](
+        inputs, training, self._quantizer_vars[name], **kwargs)
+  def _build_quantizer_vars(self):
+    for name in self._quantizers:
+      self._quantizer_vars[name] = self._quantizers[name].build(
+          tensor_shape=None, name=name, layer=self)
--- a/official/projects/qat/vision/quantization/schemes.py
+++ b/official/projects/qat/vision/quantization/schemes.py
@@ -33,6 +33,8 @@ LayerPattern = tfmot.quantization.keras.graph_transformations.transforms.LayerPa
 _QUANTIZATION_WEIGHT_NAMES = [
    'output_max', 'output_min', 'optimizer_step',
    'kernel_min', 'kernel_max',
+    'add_three_min', 'add_three_max',
+    'divide_six_min', 'divide_six_max',
    'depthwise_kernel_min', 'depthwise_kernel_max',
    'reduce_mean_quantizer_vars_min', 'reduce_mean_quantizer_vars_max']