Update code to v2.11.0

32e4ca51 · qianyj · 9485aa1d · 71060f67 · 32e4ca51 · 32e4ca51
Commit 32e4ca51 authored Nov 28, 2023 by qianyj
20 changed files
--- a/official/projects/edgetpu/vision/modeling/backbones/mobilenet_edgetpu_test.py
+++ b/official/projects/edgetpu/vision/modeling/backbones/mobilenet_edgetpu_test.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Tests for MobileNet."""

 # Import libraries

--- a/official/projects/edgetpu/vision/modeling/common_modules.py
+++ b/official/projects/edgetpu/vision/modeling/common_modules.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/projects/edgetpu/vision/modeling/custom_layers.py
+++ b/official/projects/edgetpu/vision/modeling/custom_layers.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@ import inspect
 from typing import Any, MutableMapping, Optional, Union, Tuple
 import tensorflow as tf

+from official.modeling import tf_utils
+

 class GroupConv2D(tf.keras.layers.Conv2D):
  """2D group convolution as a Keras Layer."""
@@ -168,7 +170,7 @@ class GroupConv2D(tf.keras.layers.Conv2D):
          self.add_weight(
              name='kernel_{}'.format(g),
              shape=self.group_kernel_shape,
-              initializer=self.kernel_initializer,
+              initializer=tf_utils.clone_initializer(self.kernel_initializer),
              regularizer=self.kernel_regularizer,
              constraint=self.kernel_constraint,
              trainable=True,
@@ -178,7 +180,7 @@ class GroupConv2D(tf.keras.layers.Conv2D):
            self.add_weight(
                name='bias_{}'.format(g),
                shape=(self.group_output_channel,),
-                initializer=self.bias_initializer,
+                initializer=tf_utils.clone_initializer(self.bias_initializer),
                regularizer=self.bias_regularizer,
                constraint=self.bias_constraint,
                trainable=True,

--- a/official/projects/edgetpu/vision/modeling/custom_layers_test.py
+++ b/official/projects/edgetpu/vision/modeling/custom_layers_test.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/projects/edgetpu/vision/modeling/heads/__init__.py
+++ b/official/projects/edgetpu/vision/modeling/heads/__init__.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/projects/edgetpu/vision/modeling/heads/bifpn_head.py
+++ b/official/projects/edgetpu/vision/modeling/heads/bifpn_head.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -670,7 +670,7 @@ class SegClassNet(tf.keras.layers.Layer):
    self.min_level = min_level
    self.max_level = max_level
    self.fullres_output = fullres_output
-    self.fullres_conv_transpose = fullres_skip_connections
+    self.fullres_skip_connections = fullres_skip_connections

    self.fnode = FNode(
        0,  # Always use the first level with highest resolution.
@@ -726,7 +726,7 @@ class SegClassNet(tf.keras.layers.Layer):

    if self.fullres_output:
      for i in reversed(range(self.min_level)):
-        if self.config.fullres_skip_connections:
+        if self.fullres_skip_connections:
          net = tf.keras.layers.Concatenate()([net, backbone_feats[i + 1]])
        net = self.fullres_conv[str(i)](net)
        net = self.fullres_conv_transpose[str(i)](net)

--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model_blocks.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model_blocks.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model_test.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model_test.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model_blocks.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model_blocks.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,6 +26,8 @@ from official.modeling.hyperparams import oneof
 from official.projects.edgetpu.vision.modeling import common_modules
 from official.projects.edgetpu.vision.modeling import custom_layers

+InitializerType = Optional[Union[str, tf.keras.initializers.Initializer]]
+

 @dataclasses.dataclass
 class BlockType(oneof.OneOfConfig):
@@ -216,6 +218,8 @@ class ModelConfig(base_config.Config):
  stem_base_filters: int = 64
  stem_kernel_size: int = 5
  top_base_filters: int = 1280
+  conv_kernel_initializer: InitializerType = None
+  dense_kernel_initializer: InitializerType = None
  blocks: Tuple[BlockConfig, ...] = (
      # (input_filters, output_filters, kernel_size, num_repeat,
      #  expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
@@ -279,7 +283,8 @@ def mobilenet_edgetpu_v2_base(
    drop_connect_rate: float = 0.1,
    filter_size_overrides: Optional[Dict[int, int]] = None,
    block_op_overrides: Optional[Dict[int, Dict[int, Dict[str, Any]]]] = None,
-    block_group_overrides: Optional[Dict[int, Dict[str, Any]]] = None):
+    block_group_overrides: Optional[Dict[int, Dict[str, Any]]] = None,
+    topology: Optional[TopologyConfig] = None):
  """Creates MobilenetEdgeTPUV2 ModelConfig based on tuning parameters."""

  config = ModelConfig()
@@ -295,7 +300,7 @@ def mobilenet_edgetpu_v2_base(
  }
  config = config.replace(**param_overrides)

-  topology_config = TopologyConfig()
+  topology_config = TopologyConfig() if topology is None else topology
  if filter_size_overrides:
    for group_id in filter_size_overrides:
      topology_config.block_groups[group_id].filters = filter_size_overrides[
@@ -724,6 +729,7 @@ def conv2d_block_as_layers(
    use_bias: bool = False,
    activation: Any = None,
    depthwise: bool = False,
+    kernel_initializer: InitializerType = None,
    name: Optional[str] = None) -> List[tf.keras.layers.Layer]:
  """A conv2d followed by batch norm and an activation."""
  batch_norm = common_modules.get_batch_norm(config.batch_norm)
@@ -748,11 +754,13 @@ def conv2d_block_as_layers(
  sequential_layers: List[tf.keras.layers.Layer] = []
  if depthwise:
    conv2d = tf.keras.layers.DepthwiseConv2D
-    init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER})
+    init_kwargs.update({'depthwise_initializer': kernel_initializer})
  else:
    conv2d = tf.keras.layers.Conv2D
-    init_kwargs.update({'filters': conv_filters,
-                        'kernel_initializer': CONV_KERNEL_INITIALIZER})
+    init_kwargs.update({
+        'filters': conv_filters,
+        'kernel_initializer': kernel_initializer
+    })

  sequential_layers.append(conv2d(**init_kwargs))

@@ -780,12 +788,21 @@ def conv2d_block(inputs: tf.Tensor,
                 use_bias: bool = False,
                 activation: Any = None,
                 depthwise: bool = False,
+                 kernel_initializer: Optional[InitializerType] = None,
                 name: Optional[str] = None) -> tf.Tensor:
  """Compatibility with third_party/car/deep_nets."""
  x = inputs
-  for layer in conv2d_block_as_layers(conv_filters, config, kernel_size,
-                                      strides, use_batch_norm, use_bias,
-                                      activation, depthwise, name):
+  for layer in conv2d_block_as_layers(
+      conv_filters=conv_filters,
+      config=config,
+      kernel_size=kernel_size,
+      strides=strides,
+      use_batch_norm=use_batch_norm,
+      use_bias=use_bias,
+      activation=activation,
+      depthwise=depthwise,
+      kernel_initializer=kernel_initializer,
+      name=name):
    x = layer(x)
  return x

@@ -828,6 +845,9 @@ class _MbConvBlock:
    use_groupconv = block.conv_type == 'group'
    prefix = prefix or ''
    self.name = prefix
+    conv_kernel_initializer = (
+        config.conv_kernel_initializer if config.conv_kernel_initializer
+        is not None else CONV_KERNEL_INITIALIZER)

    filters = block.input_filters * block.expand_ratio

@@ -851,21 +871,25 @@ class _MbConvBlock:
            activation=activation,
            name=prefix + 'fused'))
      else:
-        self.expand_block.extend(conv2d_block_as_layers(
-            filters,
-            config,
+        self.expand_block.extend(
+            conv2d_block_as_layers(
+                conv_filters=filters,
+                config=config,
                kernel_size=block.kernel_size,
                strides=block.strides,
                activation=activation,
+                kernel_initializer=conv_kernel_initializer,
                name=prefix + 'fused'))
    else:
      if block.expand_ratio != 1:
        # Expansion phase with a pointwise conv
-        self.expand_block.extend(conv2d_block_as_layers(
-            filters,
-            config,
+        self.expand_block.extend(
+            conv2d_block_as_layers(
+                conv_filters=filters,
+                config=config,
                kernel_size=(1, 1),
                activation=activation,
+                kernel_initializer=conv_kernel_initializer,
                name=prefix + 'expand'))

      # Main kernel, after the expansion (if applicable, i.e. not fused).
@@ -876,6 +900,7 @@ class _MbConvBlock:
            kernel_size=block.kernel_size,
            strides=block.strides,
            activation=activation,
+            kernel_initializer=conv_kernel_initializer,
            depthwise=True,
            name=prefix + 'depthwise'))
      elif use_groupconv:
@@ -907,27 +932,30 @@ class _MbConvBlock:
          tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape'))
      self.squeeze_excitation.extend(
          conv2d_block_as_layers(
-              num_reduced_filters,
-              config,
+              conv_filters=num_reduced_filters,
+              config=config,
              use_bias=True,
              use_batch_norm=False,
              activation=activation,
+              kernel_initializer=conv_kernel_initializer,
              name=prefix + 'se_reduce'))
      self.squeeze_excitation.extend(
          conv2d_block_as_layers(
-              filters,
-              config,
+              conv_filters=filters,
+              config=config,
              use_bias=True,
              use_batch_norm=False,
              activation='sigmoid',
+              kernel_initializer=conv_kernel_initializer,
              name=prefix + 'se_expand'))

    # Output phase
    self.project_block.extend(
        conv2d_block_as_layers(
-            block.output_filters,
-            config,
+            conv_filters=block.output_filters,
+            config=config,
            activation=None,
+            kernel_initializer=conv_kernel_initializer,
            name=prefix + 'project'))

    # Add identity so that quantization-aware training can insert quantization
@@ -993,6 +1021,12 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input,
  activation = tf_utils.get_activation(config.activation)
  dropout_rate = config.dropout_rate
  drop_connect_rate = config.drop_connect_rate
+  conv_kernel_initializer = (
+      config.conv_kernel_initializer if config.conv_kernel_initializer
+      is not None else CONV_KERNEL_INITIALIZER)
+  dense_kernel_initializer = (
+      config.dense_kernel_initializer if config.dense_kernel_initializer
+      is not None else DENSE_KERNEL_INITIALIZER)
  num_classes = config.num_classes
  input_channels = config.input_channels
  rescale_input = config.rescale_input
@@ -1010,12 +1044,13 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input,

  # Build stem
  x = conv2d_block(
-      x,
-      round_filters(stem_base_filters, config),
-      config,
+      inputs=x,
+      conv_filters=round_filters(stem_base_filters, config),
+      config=config,
      kernel_size=[stem_kernel_size, stem_kernel_size],
      strides=[2, 2],
      activation=activation,
+      kernel_initializer=conv_kernel_initializer,
      name='stem')

  # Build blocks
@@ -1061,10 +1096,12 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input,
  if config.backbone_only:
    return backbone_levels
  # Build top
-  x = conv2d_block(x,
-                   round_filters(top_base_filters, config),
-                   config,
+  x = conv2d_block(
+      inputs=x,
+      conv_filters=round_filters(top_base_filters, config),
+      config=config,
      activation=activation,
+      kernel_initializer=conv_kernel_initializer,
      name='top')

  # Build classifier
@@ -1075,7 +1112,7 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input,
  x = tf.keras.layers.Conv2D(
      num_classes,
      1,
-      kernel_initializer=DENSE_KERNEL_INITIALIZER,
+      kernel_initializer=dense_kernel_initializer,
      kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
      bias_regularizer=tf.keras.regularizers.l2(weight_decay),
      name='logits')(

--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model_blocks_test.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model_blocks_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for mobilenet_edgetpu_v2_model_blocks."""
+
+import tensorflow as tf
+
+from official.projects.edgetpu.vision.modeling import custom_layers
+from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v2_model_blocks
+
+
+class MobilenetEdgetpuV2ModelBlocksTest(tf.test.TestCase):
+
+  def setUp(self):
+    super().setUp()
+    self.model_config = mobilenet_edgetpu_v2_model_blocks.ModelConfig()
+
+  def test_model_creatation(self):
+    model_input = tf.keras.layers.Input(shape=(224, 224, 1))
+    model_output = mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2(
+        image_input=model_input,
+        config=self.model_config)
+    test_model = tf.keras.Model(inputs=model_input, outputs=model_output)
+    self.assertIsInstance(test_model, tf.keras.Model)
+    self.assertEqual(test_model.input.shape, (None, 224, 224, 1))
+    self.assertEqual(test_model.output.shape, (None, 1001))
+
+  def test_model_with_customized_kernel_initializer(self):
+    self.model_config.conv_kernel_initializer = 'he_uniform'
+    self.model_config.dense_kernel_initializer = 'glorot_normal'
+    model_input = tf.keras.layers.Input(shape=(224, 224, 1))
+    model_output = mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2(
+        image_input=model_input,
+        config=self.model_config)
+    test_model = tf.keras.Model(inputs=model_input, outputs=model_output)
+
+    conv_layer_stack = []
+    for layer in test_model.layers:
+      if (isinstance(layer, tf.keras.layers.Conv2D) or
+          isinstance(layer, tf.keras.layers.DepthwiseConv2D) or
+          isinstance(layer, custom_layers.GroupConv2D)):
+        conv_layer_stack.append(layer)
+    self.assertGreater(len(conv_layer_stack), 2)
+    # The last Conv layer is used as a Dense layer.
+    for layer in conv_layer_stack[:-1]:
+      if isinstance(layer, custom_layers.GroupConv2D):
+        self.assertIsInstance(layer.kernel_initializer,
+                              tf.keras.initializers.GlorotUniform)
+      elif isinstance(layer, tf.keras.layers.Conv2D):
+        self.assertIsInstance(layer.kernel_initializer,
+                              tf.keras.initializers.HeUniform)
+      elif isinstance(layer, tf.keras.layers.DepthwiseConv2D):
+        self.assertIsInstance(layer.depthwise_initializer,
+                              tf.keras.initializers.HeUniform)
+
+    self.assertIsInstance(conv_layer_stack[-1].kernel_initializer,
+                          tf.keras.initializers.GlorotNormal)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model_test.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model_test.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/projects/edgetpu/vision/modeling/optimized_multiheadattention_layer.py
+++ b/official/projects/edgetpu/vision/modeling/optimized_multiheadattention_layer.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""MultiHeadAttention layer optimized for EdgeTPU.
+
+Compared to tf.keras.layers.MultiHeadAttention, this layer performs query-key
+multiplication instead of key-query multiplication to remove an unnecessary
+transpose.
+"""
+import math
+import string
+from typing import Optional, Tuple
+
+import numpy as np
+import tensorflow as tf
+
+_CHR_IDX = string.ascii_lowercase
+
+
+def _build_attention_equation(
+    rank: int, attn_axes: Tuple[int, ...]) -> Tuple[str, str, int]:
+  """Builds einsum equations for the attention computation.
+
+  Query, key, value inputs after projection are expected to have the shape as:
+  `(bs, <non-attention dims>, <attention dims>, num_heads, channels)`.
+  `bs` and `<non-attention dims>` are treated as `<batch dims>`.
+
+  The attention operations can be generalized:
+  (1) Query-key dot product:
+  `(<batch dims>, <query attention dims>, num_heads, channels), (<batch dims>,
+  <key attention dims>, num_heads, channels) -> (<batch dims>,
+  num_heads, <query attention dims>, <key attention dims>)`
+  (2) Combination:
+  `(<batch dims>, num_heads, <query attention dims>, <key attention dims>),
+  (<batch dims>, <value attention dims>, num_heads, channels) -> (<batch
+  dims>, <query attention dims>, num_heads, channels)`
+
+  Args:
+    rank: Rank of query, key, value tensors.
+    attn_axes: List/tuple of axes, `[-1, rank)`, that attention will be
+      applied to.
+
+  Returns:
+    Einsum equations.
+  """
+  target_notation = _CHR_IDX[:rank]
+  # `batch_dims` includes the head dim.
+  batch_dims = tuple(np.delete(range(rank), attn_axes + (rank - 1,)))
+  letter_offset = rank
+  source_notation = ""
+  for i in range(rank):
+    if i in batch_dims or i == rank - 1:
+      source_notation += target_notation[i]
+    else:
+      source_notation += _CHR_IDX[letter_offset]
+      letter_offset += 1
+
+  product_notation = "".join([target_notation[i] for i in batch_dims] +
+                             [target_notation[i] for i in attn_axes] +
+                             [source_notation[i] for i in attn_axes])
+  dot_product_equation = "%s,%s->%s" % (
+      target_notation,
+      source_notation,
+      product_notation,
+  )
+  attn_scores_rank = len(product_notation)
+  combine_equation = "%s,%s->%s" % (
+      product_notation,
+      source_notation,
+      target_notation,
+  )
+  return dot_product_equation, combine_equation, attn_scores_rank
+
+
+class OptimizedMultiHeadAttention(tf.keras.layers.MultiHeadAttention):
+  """MultiHeadAttention with query-key multiplication.
+
+  Currently, this layer only works for self-attention but not for
+  cross-attention. TODO(b/243166060).
+  """
+
+  def _build_attention(self, rank: int) -> None:
+    """Builds multi-head dot-product attention computations.
+
+    This function builds attributes necessary for `_compute_attention` to
+    customize attention computation to replace the default dot-product
+    attention.
+
+    Args:
+      rank: the rank of query, key, value tensors.
+    """
+    if self._attention_axes is None:
+      self._attention_axes = tuple(range(1, rank - 2))
+    else:
+      self._attention_axes = tuple(self._attention_axes)
+    (
+        self._dot_product_equation,
+        self._combine_equation,
+        attn_scores_rank,
+    ) = _build_attention_equation(
+        rank, attn_axes=self._attention_axes)
+    norm_axes = tuple(
+        range(attn_scores_rank - len(self._attention_axes), attn_scores_rank))
+    self._softmax = tf.keras.layers.Softmax(axis=norm_axes)
+    self._dropout_layer = tf.keras.layers.Dropout(rate=self._dropout)
+
+  def _compute_attention(
+      self,
+      query: tf.Tensor,
+      key: tf.Tensor,
+      value: tf.Tensor,
+      attention_mask: Optional[tf.Tensor] = None,
+      training: Optional[bool] = None) -> Tuple[tf.Tensor, tf.Tensor]:
+    """Applies Dot-product attention with query, key, value tensors.
+
+    This function defines the computation inside `call` with projected
+    multi-head Q, K, V inputs. Users can override this function for
+    customized attention implementation.
+
+    Args:
+      query: Projected query `Tensor` of shape `(B, T, N, key_dim)`.
+      key: Projected key `Tensor` of shape `(B, S, N, key_dim)`.
+      value: Projected value `Tensor` of shape `(B, S, N, value_dim)`.
+      attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
+        attention to certain positions. It is generally not needed if the
+        `query` and `value` (and/or `key`) are masked.
+      training: Python boolean indicating whether the layer should behave in
+        training mode (adding dropout) or in inference mode (doing nothing).
+
+    Returns:
+      attention_output: Multi-headed outputs of attention computation.
+      attention_scores: Multi-headed attention weights.
+    """
+    # Note: Applying scalar multiply at the smaller end of einsum improves
+    # XLA performance, but may introduce slight numeric differences in
+    # the Transformer attention head.
+    query = tf.multiply(query, 1.0 / math.sqrt(float(self._key_dim)))
+
+    # Take the dot product between "query" and "key" to get the raw
+    # attention scores.
+    attention_scores = tf.einsum(self._dot_product_equation, query, key)
+
+    attention_scores = self._masked_softmax(attention_scores, attention_mask)
+
+    # This is actually dropping out entire tokens to attend to, which might
+    # seem a bit unusual, but is taken from the original Transformer paper.
+    attention_scores_dropout = self._dropout_layer(
+        attention_scores, training=training)
+
+    # `context_layer` = [B, T, N, H]
+    attention_output = tf.einsum(self._combine_equation,
+                                 attention_scores_dropout, value)
+    return attention_output, attention_scores
--- a/official/projects/edgetpu/vision/modeling/optimized_multiheadattention_layer_test.py
+++ b/official/projects/edgetpu/vision/modeling/optimized_multiheadattention_layer_test.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for optimized_multiheadattention_layer."""
+
+import numpy as np
+import tensorflow as tf
+
+from official.projects.edgetpu.vision.modeling import optimized_multiheadattention_layer
+
+_BATCH_SIZE = 32
+_SEQ_LEN = 4
+_EMBEDDING_SIZE = 8
+_NUM_HEADS = 2
+_KEY_DIM = 2
+
+
+class OptimizedMultiheadattentionLayerTest(tf.test.TestCase):
+
+  def test_same_output(self):
+    """Tests that OptimizedMultiHeadAttention returns the expected outputs."""
+
+    input_tensor_1 = tf.random.uniform((_BATCH_SIZE, _SEQ_LEN, _EMBEDDING_SIZE))
+    input_tensor_2 = tf.random.uniform((_BATCH_SIZE, _SEQ_LEN, _EMBEDDING_SIZE))
+
+    # Instantiate layer and call with inputs to build.
+    orig_layer = tf.keras.layers.MultiHeadAttention(
+        num_heads=_NUM_HEADS, key_dim=_KEY_DIM)
+    _ = orig_layer(input_tensor_1, input_tensor_2)
+    opt_layer = optimized_multiheadattention_layer.OptimizedMultiHeadAttention(
+        num_heads=_NUM_HEADS, key_dim=_KEY_DIM)
+    _ = opt_layer(input_tensor_1, input_tensor_2)
+
+    # Set the weights of the two layers to be the same.
+    query_dense_weights = np.random.uniform(
+        size=(_EMBEDDING_SIZE, _NUM_HEADS, _KEY_DIM))
+    query_dense_bias = np.random.uniform(size=(_NUM_HEADS, _KEY_DIM))
+    key_dense_weights = np.random.uniform(
+        size=(_EMBEDDING_SIZE, _NUM_HEADS, _KEY_DIM))
+    key_dense_bias = np.random.uniform(size=(_NUM_HEADS, _KEY_DIM))
+    value_dense_weights = np.random.uniform(
+        size=(_EMBEDDING_SIZE, _NUM_HEADS, _KEY_DIM))
+    value_dense_bias = np.random.uniform(size=(_NUM_HEADS, _KEY_DIM))
+    attention_output_dense_weights = np.random.uniform(
+        size=(_NUM_HEADS, _KEY_DIM, _EMBEDDING_SIZE))
+    attention_output_dense_bias = np.random.uniform(size=(_EMBEDDING_SIZE,))
+
+    orig_layer._query_dense.set_weights([query_dense_weights, query_dense_bias])
+    orig_layer._key_dense.set_weights([key_dense_weights, key_dense_bias])
+    orig_layer._value_dense.set_weights([value_dense_weights, value_dense_bias])
+    orig_layer._output_dense.set_weights(
+        [attention_output_dense_weights, attention_output_dense_bias])
+
+    opt_layer._query_dense.set_weights([query_dense_weights, query_dense_bias])
+    opt_layer._key_dense.set_weights([key_dense_weights, key_dense_bias])
+    opt_layer._value_dense.set_weights([value_dense_weights, value_dense_bias])
+    opt_layer._output_dense.set_weights(
+        [attention_output_dense_weights, attention_output_dense_bias])
+
+    # Calculate two sets of attention outputs and scores and compare.
+    orig_attn_output, orig_attn_score = orig_layer(
+        input_tensor_1, input_tensor_2, return_attention_scores=True)
+    opt_attn_output, opt_attn_score = opt_layer(
+        input_tensor_1, input_tensor_2, return_attention_scores=True)
+    self.assertAllClose(orig_attn_output, opt_attn_output)
+    self.assertAllClose(orig_attn_score, opt_attn_score)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/vision/serving/__init__.py
+++ b/official/projects/edgetpu/vision/serving/__init__.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/projects/edgetpu/vision/serving/export_tflite.py
+++ b/official/projects/edgetpu/vision/serving/export_tflite.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -60,6 +60,8 @@ flags.DEFINE_integer(
    'image_size', 224,
    'Size of the input image. Ideally should be the same as the image_size used '
    'in training config.')
+flags.DEFINE_bool(
+    'fix_batch_size', True, 'Whether to export model with fixed batch size.')
 flags.DEFINE_string(
    'output_layer', None,
    'Layer name to take the output from. Can be used to take the output from '
@@ -146,9 +148,11 @@ def run_export():
    output_layer = model.get_layer(export_config.output_layer)
    model = tf.keras.Model(model.input, output_layer.output)

+  batch_size = 1 if FLAGS.fix_batch_size else None
+
  model_input = tf.keras.Input(
      shape=(export_config.image_size, export_config.image_size, 3),
-      batch_size=1)
+      batch_size=batch_size)
  model_output = export_util.finalize_serving(model(model_input), export_config)
  model_for_inference = tf.keras.Model(model_input, model_output)


--- a/official/projects/edgetpu/vision/serving/export_tflite_test.py
+++ b/official/projects/edgetpu/vision/serving/export_tflite_test.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/projects/edgetpu/vision/serving/export_util.py
+++ b/official/projects/edgetpu/vision/serving/export_util.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,7 +31,7 @@ from official.projects.edgetpu.vision.modeling import custom_layers
 from official.projects.edgetpu.vision.modeling.backbones import mobilenet_edgetpu
 from official.projects.edgetpu.vision.tasks import image_classification
 from official.projects.edgetpu.vision.tasks import semantic_segmentation as edgetpu_semantic_segmentation
-from official.vision.beta.tasks import semantic_segmentation
+from official.vision.tasks import semantic_segmentation
 # pylint: enable=unused-import

 MEAN_RGB = [127.5, 127.5, 127.5]
@@ -107,6 +107,12 @@ class ExportConfig(base_config.Config):
 def finalize_serving(model_output, export_config):
  """Adds extra layers based on the provided configuration."""

+  if isinstance(model_output, dict):
+    return {
+        key: finalize_serving(model_output[key], export_config)
+        for key in model_output
+    }
+
  finalize_method = export_config.finalize_method
  output_layer = model_output
  if not finalize_method or finalize_method[0] == 'none':
@@ -183,8 +189,7 @@ def representative_dataset_gen(export_config):
  """Gets a python generator of numpy arrays for the given dataset."""
  quantization_config = export_config.quantization_config
  dataset = tfds.builder(
-      quantization_config.dataset_name,
-      data_dir=quantization_config.dataset_dir)
+      quantization_config.dataset_name, try_gcs=True)
  dataset.download_and_prepare()
  data = dataset.as_dataset()[quantization_config.dataset_split]
  iterator = data.as_numpy_iterator()
@@ -201,7 +206,8 @@ def configure_tflite_converter(export_config, converter):
  """Common code for picking up quantization parameters."""
  quantization_config = export_config.quantization_config
  if quantization_config.quantize:
-    if quantization_config.dataset_dir is None:
+    if (quantization_config.dataset_dir is
+        None) and (quantization_config.dataset_name is None):
      raise ValueError(
          'Must provide a representative dataset when quantizing the model.')
    converter.optimizations = [tf.lite.Optimize.DEFAULT]

--- a/official/projects/edgetpu/vision/serving/tflite_imagenet_evaluator.py
+++ b/official/projects/edgetpu/vision/serving/tflite_imagenet_evaluator.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.