Internal change

PiperOrigin-RevId: 404080616

Internal change
PiperOrigin-RevId: 404080616
b2c3a9ba · A. Unique TensorFlower · saberkun · ca3d3920 · b2c3a9ba · b2c3a9ba
Commit b2c3a9ba authored Oct 18, 2021 by A. Unique TensorFlower Committed by saberkun Oct 18, 2021
20 changed files
--- a/official/projects/edgetpu/vision/modeling/__init__.py
+++ b/official/projects/edgetpu/vision/modeling/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
--- a/official/projects/edgetpu/vision/modeling/backbones/__init__.py
+++ b/official/projects/edgetpu/vision/modeling/backbones/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
--- a/official/projects/edgetpu/vision/modeling/backbones/mobilenet_edgetpu.py
+++ b/official/projects/edgetpu/vision/modeling/backbones/mobilenet_edgetpu.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definitions of mobilenet_edgetpu_v2 Networks."""
+
+# Import libraries
+
+from absl import logging
+import tensorflow as tf
+
+from official.modeling import hyperparams
+from official.projects.edgetpu.vision.modeling.mobilenet_edgetpu_v1_model import MobilenetEdgeTPU
+from official.projects.edgetpu.vision.modeling.mobilenet_edgetpu_v2_model import MobilenetEdgeTPUV2
+from official.vision.beta.modeling.backbones import factory
+
+layers = tf.keras.layers
+
+# MobileNet-EdgeTPU-V2 configs.
+MOBILENET_EDGETPU_V2_CONFIGS = frozenset([
+    'mobilenet_edgetpu_v2_tiny',
+    'mobilenet_edgetpu_v2_xs',
+    'mobilenet_edgetpu_v2_s',
+    'mobilenet_edgetpu_v2_m',
+    'mobilenet_edgetpu_v2_l',
+    'autoseg_edgetpu_backbone_xs',
+    'autoseg_edgetpu_backbone_s',
+    'autoseg_edgetpu_backbone_m',
+])
+
+# MobileNet-EdgeTPU-V1 configs.
+MOBILENET_EDGETPU_CONFIGS = frozenset([
+    'mobilenet_edgetpu',
+    'mobilenet_edgetpu_dm1p25',
+    'mobilenet_edgetpu_dm1p5',
+    'mobilenet_edgetpu_dm1p75',
+])
+
+
+def freeze_large_filters(model: tf.keras.Model, threshold: int):
+  """Freezes layer with large number of filters."""
+  for layer in model.layers:
+    if isinstance(layer.output_shape, tuple):
+      filter_size = layer.output_shape[-1]
+      if filter_size >= threshold:
+        logging.info('Freezing layer: %s', layer.name)
+        layer.trainable = False
+
+
+@factory.register_backbone_builder('mobilenet_edgetpu')
+def build_mobilenet_edgetpu(input_specs: tf.keras.layers.InputSpec,
+                            backbone_config: hyperparams.Config,
+                            **unused_kwargs) -> tf.keras.Model:
+  """Builds MobileNetEdgeTpu backbone from a config."""
+  backbone_type = backbone_config.type
+  backbone_cfg = backbone_config.get()
+  assert backbone_type == 'mobilenet_edgetpu', (f'Inconsistent backbone type '
+                                                f'{backbone_type}')
+
+  if backbone_cfg.model_id in MOBILENET_EDGETPU_V2_CONFIGS:
+    model = MobilenetEdgeTPUV2.from_name(
+        model_name=backbone_cfg.model_id,
+        overrides={
+            'batch_norm': 'tpu',
+            'rescale_input': False,
+            'resolution': input_specs.shape[1:3],
+            'backbone_only': True,
+            'features_as_dict': True,
+            'dtype': 'bfloat16'
+        },
+        model_weights_path=backbone_cfg.pretrained_checkpoint_path)
+    if backbone_cfg.freeze_large_filters:
+      freeze_large_filters(model, backbone_cfg.freeze_large_filters)
+    return model
+  elif backbone_cfg.model_id in MOBILENET_EDGETPU_CONFIGS:
+    model = MobilenetEdgeTPU.from_name(
+        model_name=backbone_cfg.model_id,
+        overrides={
+            'batch_norm': 'tpu',
+            'rescale_input': False,
+            'resolution': input_specs.shape[1:3],
+            'backbone_only': True,
+            'dtype': 'bfloat16'
+        },
+        model_weights_path=backbone_cfg.pretrained_checkpoint_path)
+    if backbone_cfg.freeze_large_filters:
+      freeze_large_filters(model, backbone_cfg.freeze_large_filters)
+    return model
+  else:
+    raise ValueError(f'Unsupported model/id type {backbone_cfg.model_id}.')
--- a/official/projects/edgetpu/vision/modeling/backbones/mobilenet_edgetpu_test.py
+++ b/official/projects/edgetpu/vision/modeling/backbones/mobilenet_edgetpu_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for MobileNet."""
+
+# Import libraries
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.projects.edgetpu.vision.modeling.backbones import mobilenet_edgetpu
+
+
+class TestInputSpec:
+
+  def __init__(self, shape):
+    self.shape = shape
+
+
+class TestBackboneConfig:
+
+  def __init__(self, model_id):
+    self.model_id = model_id
+    self.freeze_large_filters = 99
+    self.pretrained_checkpoint_path = None
+    self.type = 'mobilenet_edgetpu'
+
+  def get(self):
+    return self
+
+
+class MobileNetEdgeTPUTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(
+      ('mobilenet_edgetpu_v2_s', (1, 512, 512, 3)),
+      ('mobilenet_edgetpu_v2_l', (1, None, None, 3)),
+      ('mobilenet_edgetpu', (1, 512, 512, 3)),
+      ('mobilenet_edgetpu_dm1p25', (1, None, None, 3)),
+  )
+  def test_mobilenet_creation(self, model_id, input_shape):
+    """Test creation of MobileNet family models."""
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    test_model = mobilenet_edgetpu.build_mobilenet_edgetpu(
+        input_specs=TestInputSpec(input_shape),
+        backbone_config=TestBackboneConfig(model_id))
+    self.assertGreater(len(test_model.outputs), 1)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/vision/modeling/common_modules.py
+++ b/official/projects/edgetpu/vision/modeling/common_modules.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Common modeling utilities."""
+from typing import Optional, Tuple
+# Import libraries
+import numpy as np
+import tensorflow as tf
+import tensorflow.compat.v1 as tf1
+
+from tensorflow.python.tpu import tpu_function  # pylint: disable=g-direct-tensorflow-import
+
+
+MEAN_RGB = (0.5 * 255, 0.5 * 255, 0.5 * 255)
+STDDEV_RGB = (0.5 * 255, 0.5 * 255, 0.5 * 255)
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class TpuBatchNormalization(tf.keras.layers.BatchNormalization):
+  """Cross replica batch normalization."""
+
+  def __init__(self, fused: Optional[bool] = False, **kwargs):
+    if fused in (True, None):
+      raise ValueError('TpuBatchNormalization does not support fused=True.')
+    super(TpuBatchNormalization, self).__init__(fused=fused, **kwargs)
+
+  def _cross_replica_average(self, t: tf.Tensor, num_shards_per_group: int):
+    """Calculates the average value of input tensor across TPU replicas."""
+    num_shards = tpu_function.get_tpu_context().number_of_shards
+    group_assignment = None
+    if num_shards_per_group > 1:
+      if num_shards % num_shards_per_group != 0:
+        raise ValueError(
+            'num_shards: %d mod shards_per_group: %d, should be 0' %
+            (num_shards, num_shards_per_group))
+      num_groups = num_shards // num_shards_per_group
+      group_assignment = [[
+          x for x in range(num_shards) if x // num_shards_per_group == y
+      ] for y in range(num_groups)]
+    return tf1.tpu.cross_replica_sum(t, group_assignment) / tf.cast(
+        num_shards_per_group, t.dtype)
+
+  def _moments(self, inputs: tf.Tensor, reduction_axes: int, keep_dims: int):
+    """Compute the mean and variance: it overrides the original _moments."""
+    shard_mean, shard_variance = super(TpuBatchNormalization, self)._moments(
+        inputs, reduction_axes, keep_dims=keep_dims)
+
+    num_shards = tpu_function.get_tpu_context().number_of_shards or 1
+    if num_shards <= 8:  # Skip cross_replica for 2x2 or smaller slices.
+      num_shards_per_group = 1
+    else:
+      num_shards_per_group = max(8, num_shards // 8)
+    if num_shards_per_group > 1:
+      # Compute variance using: Var[X]= E[X^2] - E[X]^2.
+      shard_square_of_mean = tf.math.square(shard_mean)
+      shard_mean_of_square = shard_variance + shard_square_of_mean
+      group_mean = self._cross_replica_average(shard_mean, num_shards_per_group)
+      group_mean_of_square = self._cross_replica_average(
+          shard_mean_of_square, num_shards_per_group)
+      group_variance = group_mean_of_square - tf.math.square(group_mean)
+      return (group_mean, group_variance)
+    else:
+      return (shard_mean, shard_variance)
+
+
+def get_batch_norm(batch_norm_type: str) -> tf.keras.layers.BatchNormalization:
+  """A helper to create a batch normalization getter.
+
+  Args:
+    batch_norm_type: The type of batch normalization layer implementation. `tpu`
+     will use `TpuBatchNormalization`.
+
+  Returns:
+    An instance of `tf.keras.layers.BatchNormalization`.
+  """
+  if batch_norm_type == 'tpu':
+    return TpuBatchNormalization
+
+  return tf.keras.layers.BatchNormalization  # pytype: disable=bad-return-type  # typed-keras
+
+
+def count_params(model, trainable_only=True):
+  """Returns the count of all model parameters, or just trainable ones."""
+  if not trainable_only:
+    return model.count_params()
+  else:
+    return int(np.sum([tf.keras.backend.count_params(p)
+                       for p in model.trainable_weights]))
+
+
+def load_weights(model: tf.keras.Model,
+                 model_weights_path: str,
+                 checkpoint_format: str = 'tf_checkpoint'):
+  """Load model weights from the given file path.
+
+  Args:
+    model: the model to load weights into
+    model_weights_path: the path of the model weights
+    checkpoint_format: The source of checkpoint files. By default, we assume the
+      checkpoint is saved by tf.train.Checkpoint().save(). For legacy reasons,
+      we can also resotre checkpoint from keras model.save_weights() method by
+      setting checkpoint_format = 'keras_checkpoint'.
+  """
+  if checkpoint_format == 'tf_checkpoint':
+    checkpoint_dict = {'model': model}
+    checkpoint = tf.train.Checkpoint(**checkpoint_dict)
+    checkpoint.restore(model_weights_path).assert_existing_objects_matched()
+  elif checkpoint_format == 'keras_checkpoint':
+    # Assert makes sure load is successeful.
+    model.load_weights(model_weights_path).assert_existing_objects_matched()
+  else:
+    raise ValueError(f'Unsupported checkpoint format {checkpoint_format}.')
+
+
+def normalize_images(
+    features: tf.Tensor,
+    num_channels: int = 3,
+    dtype: str = 'float32',
+    data_format: str = 'channels_last',
+    mean_rgb: Tuple[float, ...] = MEAN_RGB,
+    stddev_rgb: Tuple[float, ...] = STDDEV_RGB,
+) -> tf.Tensor:
+  """Normalizes the input image channels with the given mean and stddev.
+
+  Args:
+    features: `Tensor` representing decoded images in float format.
+    num_channels: the number of channels in the input image tensor.
+    dtype: the dtype to convert the images to. Set to `None` to skip conversion.
+    data_format: the format of the input image tensor ['channels_first',
+      'channels_last'].
+    mean_rgb: the mean of the channels to subtract.
+    stddev_rgb: the stddev of the channels to divide.
+
+  Returns:
+    A normalized image `Tensor`.
+  """
+  if data_format == 'channels_first':
+    stats_shape = [num_channels, 1, 1]
+  else:
+    stats_shape = [1, 1, num_channels]
+
+  if dtype is not None:
+    if dtype == 'bfloat16':
+      features = tf.image.convert_image_dtype(features, dtype=tf.bfloat16)
+
+  if mean_rgb is not None:
+    mean_rgb = tf.constant(mean_rgb, shape=stats_shape, dtype=features.dtype)
+    mean_rgb = tf.broadcast_to(mean_rgb, tf.shape(features))
+    features = features - mean_rgb
+
+  if stddev_rgb is not None:
+    stddev_rgb = tf.constant(
+        stddev_rgb, shape=stats_shape, dtype=features.dtype)
+    stddev_rgb = tf.broadcast_to(stddev_rgb, tf.shape(features))
+    features = features / stddev_rgb
+
+  return features
--- a/official/projects/edgetpu/vision/modeling/custom_layers.py
+++ b/official/projects/edgetpu/vision/modeling/custom_layers.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Customized keras layers used in the EdgeTPU models."""
+
+import inspect
+from typing import Any, MutableMapping, Optional, Union, Tuple
+import tensorflow as tf
+
+
+class GroupConv2D(tf.keras.layers.Conv2D):
+  """2D group convolution as a Keras Layer."""
+
+  def __init__(self,
+               filters: int,
+               kernel_size: Union[int, Tuple[int, int]],
+               groups: int,
+               strides: Tuple[int, int] = (1, 1),
+               padding: str = 'valid',
+               data_format: str = 'channels_last',
+               dilation_rate: Tuple[int, int] = (1, 1),
+               activation: Any = None,
+               use_bias: bool = True,
+               kernel_initializer: Any = 'glorot_uniform',
+               bias_initializer: Any = 'zeros',
+               kernel_regularizer: Any = None,
+               bias_regularizer: Any = None,
+               activity_regularizer: Any = None,
+               kernel_constraint: Any = None,
+               bias_constraint: Any = None,
+               batch_norm_layer: Optional[tf.keras.layers.Layer] = None,
+               bn_epsilon: float = 1e-3,
+               bn_momentum: float = 0.99,
+               **kwargs: Any) -> tf.keras.layers.Layer:
+    """Creates a 2D group convolution keras layer.
+
+    Args:
+      filters: Integer, the dimensionality of the output space (i.e. the number
+        of output filters in the convolution).
+      kernel_size: An integer or tuple/list of 2 integers, specifying the height
+        and width of the 2D convolution window. Can be a single integer to
+        specify the same value for all spatial dimensions.
+      groups: The number of input/output channel groups.
+      strides: An integer or tuple/list of n integers, specifying the stride
+        length of the convolution. Specifying any stride value != 1 is
+        incompatible with specifying any `dilation_rate` value != 1.
+      padding: one of `"valid"` or `"same"` (case-insensitive).
+      data_format: The ordering of the dimensions in the inputs. `channels_last`
+        corresponds to inputs with shape `(batch_size, height, width, channels)`
+      dilation_rate: an integer or tuple/list of 2 integers, specifying the
+        dilation rate to use for dilated convolution. Can be a single integer to
+        specify the same value for all spatial dimensions. Currently, specifying
+        any `dilation_rate` value != 1 is incompatible with specifying any
+        stride value != 1.
+      activation: Activation function to use. If you don't specify anything, no
+        activation is applied ( see `keras.activations`).
+      use_bias: Boolean, whether the layer uses a bias vector.
+      kernel_initializer: Initializer for the `kernel` weights matrix ( see
+        `keras.initializers`).
+      bias_initializer: Initializer for the bias vector ( see
+        `keras.initializers`).
+      kernel_regularizer: Regularizer function applied to the `kernel` weights
+        matrix (see `keras.regularizers`).
+      bias_regularizer: Regularizer function applied to the bias vector ( see
+        `keras.regularizers`).
+      activity_regularizer: Regularizer function applied to the output of the
+        layer (its "activation") ( see `keras.regularizers`).
+      kernel_constraint: Constraint function applied to the kernel matrix ( see
+        `keras.constraints`).
+      bias_constraint: Constraint function applied to the bias vector ( see
+        `keras.constraints`).
+      batch_norm_layer: The batch normalization layer to use. This is typically
+        tf.keras.layer.BatchNormalization or a derived class.
+      bn_epsilon: Batch normalization epsilon.
+      bn_momentum: Momentum used for moving average in batch normalization.
+      **kwargs: Additional keyword arguments.
+    Input shape:
+      4D tensor with shape: `(batch_size, rows, cols, channels)`
+    Output shape:
+      4D tensor with shape: `(batch_size, new_rows, new_cols, filters)` `rows`
+        and `cols` values might have changed due to padding.
+
+    Returns:
+      A tensor of rank 4 representing
+      `activation(GroupConv2D(inputs, kernel) + bias)`.
+
+    Raises:
+      ValueError: if groups < 1 or groups > filters
+      ValueError: if data_format is not "channels_last".
+      ValueError: if `padding` is not `same` or `valid`.
+      ValueError: if `batch_norm_layer` is not a callable when provided.
+      ValueError: when both `strides` > 1 and `dilation_rate` > 1.
+    """
+    if groups <= 1 or groups >= filters:
+      raise ValueError('Number of groups should be greater than 1 and less '
+                       'than the output filters.')
+    self._groups = groups
+    if data_format != 'channels_last':
+      raise ValueError(
+          'GroupConv2D expects input to be in channels_last format.')
+
+    if padding.lower() not in ('same', 'valid'):
+      raise ValueError('Valid padding options are : same, or valid.')
+
+    self.use_batch_norm = False
+    if batch_norm_layer is not None:
+      if not inspect.isclass(batch_norm_layer):
+        raise ValueError('batch_norm_layer is not a class.')
+      self.use_batch_norm = True
+    self.bn_epsilon = bn_epsilon
+    self.bn_momentum = bn_momentum
+    self.batch_norm_layer = []
+    if self.use_batch_norm:
+      self.batch_norm_layer = [
+          batch_norm_layer(
+              axis=-1, momentum=self.bn_momentum, epsilon=self.bn_epsilon)
+          for i in range(self._groups)
+      ]
+
+    super().__init__(
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        use_bias=use_bias,
+        kernel_initializer=kernel_initializer,
+        bias_initializer=bias_initializer,
+        kernel_regularizer=kernel_regularizer,
+        bias_regularizer=bias_regularizer,
+        activity_regularizer=activity_regularizer,
+        kernel_constraint=kernel_constraint,
+        bias_constraint=bias_constraint,
+        groups=1,
+        **kwargs)  # pytype: disable=bad-return-type  # typed-keras
+
+  def build(self, input_shape: Tuple[int, ...]) -> None:
+    """Builds GroupConv2D layer as a collection of smaller Conv2D layers."""
+    input_shape = tf.TensorShape(input_shape)
+    input_channel = self._get_input_channel(input_shape)
+    if input_channel % self._groups != 0:
+      raise ValueError(
+          f'Number of input channels: {input_channel} are not divisible '
+          f'by number of groups: {self._groups}.')
+
+    self.group_input_channel = int(input_channel / self._groups)
+    self.group_output_channel = int(self.filters / self._groups)
+    self.group_kernel_shape = self.kernel_size + (self.group_input_channel,
+                                                  self.group_output_channel)
+
+    self.kernel = []
+    self.bias = []
+    for g in range(self._groups):
+      self.kernel.append(
+          self.add_weight(
+              name='kernel_{}'.format(g),
+              shape=self.group_kernel_shape,
+              initializer=self.kernel_initializer,
+              regularizer=self.kernel_regularizer,
+              constraint=self.kernel_constraint,
+              trainable=True,
+              dtype=self.dtype))
+      if self.use_bias:
+        self.bias.append(
+            self.add_weight(
+                name='bias_{}'.format(g),
+                shape=(self.group_output_channel,),
+                initializer=self.bias_initializer,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+                trainable=True,
+                dtype=self.dtype))
+    channel_axis = self._get_channel_axis()
+    self.input_spec = tf.keras.layers.InputSpec(
+        ndim=self.rank + 2, axes={channel_axis: input_channel})
+
+    self._build_conv_op_data_shape = input_shape[-(self.rank + 1):]
+    self._build_input_channel = input_channel
+    self._padding_op = self._get_padding_op()
+    # channels_last corresponds to 'NHWC' data format.
+    self._conv_op_data_format = 'NHWC'
+
+    self.bn_layers = []
+    if self.use_batch_norm:
+      for group_index in range(self._groups):
+        self.bn_layers.append(self.batch_norm_layer[group_index])
+
+    self.built = True
+
+  def call(self, inputs: Any, training: Optional[bool] = None) -> Any:
+    """Performs the GroupConv2D operation on the inputs."""
+    input_slices = tf.split(inputs, num_or_size_splits=self._groups, axis=-1)
+    output_slices = []
+    for i in range(self._groups):
+      # Apply conv2d to each slice
+      output_slice = tf.nn.conv2d(
+          input_slices[i],
+          self.kernel[i],
+          strides=self.strides,
+          padding=self._padding_op,
+          data_format=self._conv_op_data_format,
+          dilations=self.dilation_rate)
+
+      if self.use_bias:
+        output_slice = tf.nn.bias_add(
+            output_slice, self.bias[i], data_format='NHWC')
+
+      # Apply batch norm after bias addition.
+      if self.use_batch_norm:
+        output_slice = self.bn_layers[i](output_slice, training=training)
+
+      if self.activation is not None:
+        output_slice = self.activation(output_slice)
+
+      output_slices.append(output_slice)
+
+    # Concat the outputs back along the channel dimension
+    outputs = tf.concat(output_slices, axis=-1)
+    return outputs
+
+  def get_config(self) -> MutableMapping[str, Any]:
+    """Enables serialization for the group convolution layer."""
+    config = super().get_config()
+    config['groups'] = self._groups
+    config['batch_norm_layer'] = self.batch_norm_layer
+    config['bn_epsilon'] = self.bn_epsilon
+    config['bn_momentum'] = self.bn_momentum
+    return config
+
+  @classmethod
+  def from_config(cls, config):
+    """Creates a layer from its config.
+
+    This method is the reverse of `get_config`, capable of instantiating the
+    same layer from the config dictionary. It does not handle layer connectivity
+    (handled by Network), nor weights (handled by `set_weights`).
+
+    Also, the get_config returns a config with a list type of `batch_norm_layer`
+    we need to convert it either to None or the batch_norm class.
+
+    Arguments:
+        config: A Python dictionary, typically the output of get_config.
+
+    Returns:
+        A layer instance.
+    """
+    if not config['batch_norm_layer']:
+      config['batch_norm_layer'] = None
+    else:
+      config['batch_norm_layer'] = type(config['batch_norm_layer'][0])
+    return cls(**config)
+
+
+class GroupConv2DKerasModel(tf.keras.Model):
+  """2D group convolution as a keras model."""
+
+  def __init__(self,
+               filters: int,
+               kernel_size: Tuple[int, int],
+               groups: int,
+               batch_norm_layer: Optional[tf.keras.layers.Layer] = None,
+               bn_epsilon: float = 1e-3,
+               bn_momentum: float = 0.99,
+               data_format: str = 'channels_last',
+               padding: str = 'valid',
+               **kwargs: Any) -> tf.keras.Model:
+    """Creates a 2D group convolution layer as a keras model.
+
+    Args:
+      filters: Integer, the dimensionality of the output space (i.e. the number
+        of output filters in the convolution).
+      kernel_size: An integer or tuple/list of 2 integers, specifying the height
+        and width of the 2D convolution window. Can be a single integer to
+        specify the same value for all spatial dimensions.
+      groups: The number of input/output channel groups.
+      batch_norm_layer: The batch normalization layer to use. This is typically
+        tf.keras.layer.BatchNormalization or a derived class.
+      bn_epsilon: Batch normalization epsilon.
+      bn_momentum: Momentum used for moving average in batch normalization.
+      data_format: The ordering of the dimensions in the inputs. `channels_last`
+        corresponds to inputs with shape `(batch_size, height, width, channels)`
+      padding: one of `"valid"` or `"same"` (case-insensitive).
+      **kwargs: Additional keyword arguments passed to the underlying conv
+        layers.
+
+    Raises:
+      ValueError: if groups < 1 or groups > filters
+      ValueError: if `batch_norm_layer` is not a callable when provided.
+      ValueError: if `data_format` is not channels_last
+      ValueError: if `padding` is not `same` or `valid`.
+    """
+    super().__init__()
+    self.conv_layers = []
+    self.bn_layers = []
+    per_conv_filter_size = filters / groups
+
+    if groups <= 1 or groups >= filters:
+      raise ValueError('Number of groups should be greater than 1 and less '
+                       'than the output filters.')
+
+    self.batch_norm_layer = batch_norm_layer
+    self.use_batch_norm = False
+    if self.batch_norm_layer is not None:
+      if not inspect.isclass(self.batch_norm_layer):
+        raise ValueError('batch_norm_layer is not a class.')
+      self.use_batch_norm = True
+
+    if 'activation' in kwargs.keys():
+      self.activation = tf.keras.activations.get(kwargs['activation'])
+      kwargs.pop('activation')
+    else:
+      self.activation = None
+
+    if data_format != 'channels_last':
+      raise ValueError(
+          'GroupConv2D expects input to be in channels_last format.')
+
+    if padding.lower() not in ('same', 'valid'):
+      raise ValueError('Valid padding options are : same, or valid.')
+
+    self._groups = groups
+    for _ in range(self._groups):
+      # Override the activation so that batchnorm can be applied after the conv.
+      self.conv_layers.append(
+          tf.keras.layers.Conv2D(per_conv_filter_size, kernel_size, **kwargs))
+
+    if self.use_batch_norm:
+      for _ in range(self._groups):
+        self.bn_layers.append(
+            self.batch_norm_layer(
+                axis=-1, momentum=bn_momentum, epsilon=bn_epsilon))  # pytype: disable=bad-return-type  # typed-keras
+
+  def call(self, inputs: Any) -> Any:
+    """Applies 2d group convolution on the inputs."""
+    input_shape = inputs.get_shape().as_list()
+    if input_shape[-1] % self._groups != 0:
+      raise ValueError(
+          f'Number of input channels: {input_shape[-1]} are not divisible '
+          f'by number of groups: {self._groups}.')
+    input_slices = tf.split(inputs, num_or_size_splits=self._groups, axis=-1)
+    output_slices = []
+    for g in range(self._groups):
+      output_slice = self.conv_layers[g](input_slices[g])
+      if self.use_batch_norm:
+        output_slice = self.bn_layers[g](output_slice)
+      output_slice = self.activation(output_slice)
+      output_slices.append(output_slice)
+
+    outputs = tf.concat(output_slices, axis=-1)
+    return outputs
+
+
+def _nnapi_scalar(value, dtype):
+  # Resolves "Scalar operand should be constant" at cost of broadcasting
+  return tf.constant(value, dtype=dtype, shape=(1,))
+
+
+def _fqop(x, min_val=-128, max_val=127):
+  """Wraps an op x with fake quant op and given min/max."""
+  return tf.quantization.fake_quant_with_min_max_args(
+      x, min=min_val, max=max_val)
+
+
+def argmax(input_tensor,
+           axis=-1,
+           output_type: tf.DType = tf.dtypes.float32,
+           name: Optional[str] = None,
+           keepdims: bool = False,
+           epsilon: Optional[float] = None):
+  """Returns the index with the largest value across axes of a tensor.
+
+  Approximately tf.compat.v1.argmax, but not equivalent. If arithmetic allows
+  value to be anomalously close to the maximum, but not equal to it, the
+  behavior is undefined.
+
+  Args:
+    input_tensor: A Tensor.
+    axis: A Value. Must be in the range [-rank(input), rank(input)). Describes
+      which axis of the input Tensor to reduce across. For vectors, use axis =
+      0.
+    output_type: An optional tf.DType. Note that default is different from
+      tflite (int64) to make default behavior compatible with darwinn.
+    name: Optional name for operations.
+    keepdims: If true, retains reduced dimensions with length 1.
+    epsilon: Optional small number which is intended to be always below
+      quantization threshold, used to distinguish equal and not equal numbers.
+
+  Returns:
+    A Tensor of type output_type.
+  """
+  fqop = _fqop if output_type.is_floating else tf.identity
+  safe_axis = axis
+  if safe_axis < 0:
+    safe_axis = len(input_tensor.shape) + safe_axis
+  reduction_size = input_tensor.shape[axis]
+  axis_max = tf.math.reduce_max(input_tensor, axis=axis, keepdims=True)
+  zero_if_max = tf.subtract(axis_max, input_tensor)
+  eps = epsilon if epsilon else 1e-6
+  if input_tensor.dtype.is_floating:
+    zero_if_max_else_eps = tf.math.minimum(
+        _nnapi_scalar(eps, input_tensor.dtype), zero_if_max)
+    zero_if_max_else_one = zero_if_max_else_eps * _nnapi_scalar(
+        1 / eps, input_tensor.dtype)
+  elif input_tensor.dtype.is_integer:
+    zero_if_max_else_one = tf.math.minimum(
+        _nnapi_scalar(1, input_tensor.dtype), zero_if_max)
+  else:
+    raise ValueError('Please specify epsilon for unknown input data type')
+
+  # Input type ends here, output type starts here
+  zero_if_max_else_one = tf.cast(zero_if_max_else_one, dtype=output_type)
+  zero_if_max_else_one = fqop(zero_if_max_else_one)
+  one_if_max_else_zero = fqop(
+      tf.math.subtract(
+          fqop(_nnapi_scalar(1, output_type)), zero_if_max_else_one))
+  rev_index = tf.range(reduction_size, 0, -1, dtype=output_type)
+  for index in range(safe_axis + 1, len(input_tensor.shape)):
+    rev_index = tf.expand_dims(rev_index, axis=index - safe_axis)
+  rev_index = fqop(rev_index)
+  rev_index_if_max_else_zero = fqop(
+      tf.math.multiply(one_if_max_else_zero, rev_index))
+  reverse_argmax = fqop(
+      tf.math.reduce_max(
+          rev_index_if_max_else_zero, axis=axis, keepdims=keepdims, name=name))
+  # Final operation obtains name if argmax layer if provided
+  return fqop(
+      tf.math.subtract(
+          fqop(_nnapi_scalar(reduction_size, output_type)),
+          reverse_argmax,
+          name=name))
+
+
+class ArgmaxKerasLayer(tf.keras.layers.Layer):
+  """Implements argmax as a keras model."""
+
+  def __init__(self,
+               axis=-1,
+               name=None,
+               output_type=tf.dtypes.int32,
+               **kwargs: Any) -> tf.keras.Model:
+    """Implements argmax as a keras model.
+
+    Args:
+      axis: A Value. Must be in the range [-rank(input), rank(input)). Describes
+        which axis of the input Tensor to reduce across. For vectors, use axis =
+        0.
+      name: Optional name for operations.
+      output_type: An optional tf.DType.
+      **kwargs: Other arguments passed to model constructor.
+
+    Returns:
+      A Tensor of type output_type.
+    """
+    super().__init__(name=name, **kwargs)
+    self.axis = axis
+    self.output_type = output_type  # pytype: disable=bad-return-type  # typed-keras
+
+  def call(self, inputs: Any) -> Any:
+    """Applies argmax on the inputs."""
+    return argmax(
+        input_tensor=inputs,
+        axis=self.axis,
+        output_type=self.output_type,
+        name=self.name)
--- a/official/projects/edgetpu/vision/modeling/custom_layers_test.py
+++ b/official/projects/edgetpu/vision/modeling/custom_layers_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for custom_layers."""
+
+import itertools
+
+from absl.testing import parameterized
+import tensorflow as tf
+from official.projects.edgetpu.vision.modeling import custom_layers
+
+GROUPS = [2, 4]
+INPUT_CHANNEL = [8, 16]
+OUTPUT_CHANNEL = [8, 16]
+USE_BATCH_NORM = [True, False]
+ACTIVATION = ['relu', 'linear']
+BATCH_NORM_LAYER = tf.keras.layers.BatchNormalization
+
+# 2 functionally identical group conv implementations.
+GROUP_CONV_IMPL = {
+    'layer': custom_layers.GroupConv2D,
+    'model': custom_layers.GroupConv2DKerasModel
+}
+
+
+def _get_random_inputs(input_shape):
+  return tf.random.uniform(shape=input_shape)
+
+
+class GroupConv2DTest(tf.test.TestCase, parameterized.TestCase):
+
+  # Test for combinations of groups, input_channel, output_channel, and
+  # whether to use batch_norm
+  @parameterized.parameters(
+      itertools.product(GROUPS, INPUT_CHANNEL, OUTPUT_CHANNEL, USE_BATCH_NORM))
+  def test_construction(self, groups, input_channel, output_channel,
+                        use_batch_norm):
+    batch_norm_layer = BATCH_NORM_LAYER if use_batch_norm else None
+    l = custom_layers.GroupConv2D(
+        output_channel,
+        3,
+        groups=groups,
+        use_bias=True,
+        batch_norm_layer=batch_norm_layer)
+    inputs = _get_random_inputs(input_shape=(1, 4, 4, output_channel))
+    _ = l(inputs)
+    # kernel and bias for each group. When using batch norm, 2 additional
+    # trainable weights per group for batchnorm layers: gamma and beta.
+    expected_num_trainable_weights = groups * (2 + 2 * use_batch_norm)
+    self.assertLen(l.trainable_weights, expected_num_trainable_weights)
+
+  @parameterized.parameters(
+      itertools.product(GROUPS, INPUT_CHANNEL, OUTPUT_CHANNEL))
+  def test_kernel_shapes(self, groups, input_channel, output_channel):
+    l = custom_layers.GroupConv2D(
+        output_channel, 3, groups=groups, use_bias=False)
+    _ = l(_get_random_inputs(input_shape=(1, 32, 32, input_channel)))
+    expected_kernel_shapes = [(3, 3, int(input_channel / groups),
+                               int(output_channel / groups))
+                              for _ in range(groups)]
+    kernel_shapes = [
+        l.trainable_weights[i].get_shape()
+        for i in range(len(l.trainable_weights))
+    ]
+    self.assertListEqual(kernel_shapes, expected_kernel_shapes)
+
+  @parameterized.parameters(
+      itertools.product(GROUPS, INPUT_CHANNEL, OUTPUT_CHANNEL))
+  def test_output_shapes(self, groups, input_channel, output_channel):
+    l = custom_layers.GroupConv2D(
+        output_channel, 3, groups=groups, use_bias=False, padding='same')
+    outputs = l(_get_random_inputs(input_shape=[2, 32, 32, input_channel]))
+    self.assertListEqual(outputs.get_shape().as_list(),
+                         [2, 32, 32, output_channel])
+
+  @parameterized.parameters(
+      itertools.product(GROUPS, USE_BATCH_NORM, ACTIVATION))
+  def test_serialization_deserialization(self, groups, use_batch_norm,
+                                         activation):
+    batch_norm_layer = BATCH_NORM_LAYER if use_batch_norm else None
+    l = custom_layers.GroupConv2D(
+        filters=8,
+        kernel_size=1,
+        groups=groups,
+        use_bias=False,
+        padding='same',
+        batch_norm_layer=batch_norm_layer,
+        activation=activation)
+    config = l.get_config()
+    # New layer from config
+    new_l = custom_layers.GroupConv2D.from_config(config)
+    # Copy the weights too.
+    l.build(input_shape=(1, 1, 4))
+    new_l.build(input_shape=(1, 1, 4))
+    new_l.set_weights(l.get_weights())
+    inputs = _get_random_inputs((1, 1, 1, 4))
+    self.assertNotEqual(l, new_l)
+    self.assertAllEqual(l(inputs), new_l(inputs))
+
+  @parameterized.parameters(
+      itertools.product(GROUPS, INPUT_CHANNEL, OUTPUT_CHANNEL, USE_BATCH_NORM,
+                        ACTIVATION))
+  def test_equivalence(self, groups, input_channel, output_channel,
+                       use_batch_norm, activation):
+    batch_norm_layer = BATCH_NORM_LAYER if use_batch_norm else None
+    kwargs = dict(
+        filters=output_channel,
+        groups=groups,
+        kernel_size=1,
+        use_bias=False,
+        batch_norm_layer=batch_norm_layer,
+        activation=activation)
+    gc_layer = tf.keras.Sequential([custom_layers.GroupConv2D(**kwargs)])
+    gc_model = custom_layers.GroupConv2DKerasModel(**kwargs)
+    gc_layer.build(input_shape=(None, 3, 3, input_channel))
+    gc_model.build(input_shape=(None, 3, 3, input_channel))
+
+    inputs = _get_random_inputs((2, 3, 3, input_channel))
+    gc_layer.set_weights(gc_model.get_weights())
+
+    self.assertAllEqual(gc_layer(inputs), gc_model(inputs))
+
+  @parameterized.parameters(('layer', 1, 4), ('layer', 4, 4), ('model', 1, 4),
+                            ('model', 4, 4))
+  def test_invalid_groups_raises_value_error(self, gc_type, groups,
+                                             output_channel):
+
+    with self.assertRaisesRegex(ValueError, r'^(Number of groups)'):
+      _ = GROUP_CONV_IMPL[gc_type](
+          filters=output_channel, groups=groups, kernel_size=3)
+
+  @parameterized.parameters(('layer', 3, 4), ('layer', 4, 6), ('model', 3, 4),
+                            ('model', 4, 6))
+  def test_non_group_divisible_raises_value_error(self, gc_type, groups,
+                                                  input_channel):
+    with self.assertRaisesRegex(ValueError, r'^(Number of input channels)'):
+      l = GROUP_CONV_IMPL[gc_type](
+          filters=groups * 4, groups=groups, kernel_size=3)
+      l.build(input_shape=(4, 4, input_channel))
+
+  @parameterized.parameters(('layer'), ('model'))
+  def test_non_supported_data_format_raises_value_error(self, gc_type):
+    with self.assertRaisesRegex(ValueError, r'^(.*(channels_last).*)'):
+      _ = GROUP_CONV_IMPL[gc_type](
+          filters=4, groups=2, kernel_size=1, data_format='channels_first')
+
+  @parameterized.parameters(('layer'), ('model'))
+  def test_invalid_batch_norm_raises_value_error(self, gc_type):
+
+    def my_batch_norm(x):
+      return x**2
+
+    with self.assertRaisesRegex(ValueError, r'^(.*(not a class).*)'):
+      _ = GROUP_CONV_IMPL[gc_type](
+          filters=4, groups=2, kernel_size=1, batch_norm_layer=my_batch_norm)
+
+  @parameterized.parameters(('layer'), ('model'))
+  def test_invalid_padding_raises_value_error(self, gc_type):
+    with self.assertRaisesRegex(ValueError, r'^(.*(same, or valid).*)'):
+      _ = GROUP_CONV_IMPL[gc_type](
+          filters=4, groups=2, kernel_size=1, padding='causal')
+
+
+class ArgmaxTest(parameterized.TestCase, tf.test.TestCase):
+
+  @parameterized.parameters(([16, 32, 64], tf.dtypes.float32, tf.dtypes.int32),
+                            ([255, 19], tf.dtypes.int32, tf.dtypes.int64))
+  def test_reference_match(self, shape, input_type, output_type):
+    random_inputs = tf.random.uniform(shape=shape, maxval=10, dtype=input_type)
+    for axis in range(-len(shape) + 1, len(shape)):
+      control_output = tf.math.argmax(
+          random_inputs, axis=axis, output_type=output_type)
+      test_output = custom_layers.argmax(
+          random_inputs, axis=axis, output_type=output_type)
+      self.assertAllEqual(control_output, test_output)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/vision/modeling/heads/__init__.py
+++ b/official/projects/edgetpu/vision/modeling/heads/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
--- a/official/projects/edgetpu/vision/modeling/heads/bifpn_head.py
+++ b/official/projects/edgetpu/vision/modeling/heads/bifpn_head.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains the definitions of Bi-Directional Feature Pyramid Networks (BiFPN)."""
+import functools
+import itertools
+
+from typing import Text, Optional
+# Import libraries
+
+from absl import logging
+import numpy as np
+import tensorflow as tf
+
+from official.projects.edgetpu.vision.modeling import common_modules
+
+
+def activation_fn(features: tf.Tensor, act_type: Text):
+  """Customized non-linear activation type."""
+  if act_type in ('silu', 'swish'):
+    return tf.nn.swish(features)
+  elif act_type == 'swish_native':
+    return features * tf.sigmoid(features)
+  elif act_type == 'hswish':
+    return features * tf.nn.relu6(features + 3) / 6
+  elif act_type == 'relu':
+    return tf.nn.relu(features)
+  elif act_type == 'relu6':
+    return tf.nn.relu6(features)
+  else:
+    raise ValueError('Unsupported act_type {}'.format(act_type))
+
+
+def build_batch_norm(is_training_bn: bool,
+                     beta_initializer: Text = 'zeros',
+                     gamma_initializer: Text = 'ones',
+                     data_format: Text = 'channels_last',
+                     momentum: float = 0.99,
+                     epsilon: float = 1e-3,
+                     strategy: Optional[Text] = None,
+                     name: Text = 'tpu_batch_normalization'):
+  """Builds a batch normalization layer.
+
+  Args:
+    is_training_bn: `bool` for whether the model is training.
+    beta_initializer: `str`, beta initializer.
+    gamma_initializer: `str`, gamma initializer.
+    data_format: `str` either "channels_first" for `[batch, channels, height,
+      width]` or "channels_last for `[batch, height, width, channels]`.
+    momentum: `float`, momentume of batch norm.
+    epsilon: `float`, small value for numerical stability.
+    strategy: `str`, whether to use tpu, gpus or other version of batch norm.
+    name: the name of the batch normalization layer
+
+  Returns:
+    A normalized `Tensor` with the same `data_format`.
+  """
+  axis = 1 if data_format == 'channels_first' else -1
+
+  if is_training_bn:
+    batch_norm_class = common_modules.get_batch_norm(strategy)
+  else:
+    batch_norm_class = tf.keras.layers.BatchNormalization
+
+  bn_layer = batch_norm_class(
+      axis=axis,
+      momentum=momentum,
+      epsilon=epsilon,
+      center=True,
+      scale=True,
+      beta_initializer=beta_initializer,
+      gamma_initializer=gamma_initializer,
+      name=name)
+
+  return bn_layer
+
+
+def bifpn_config(min_level, max_level):
+  """A dynamic bifpn config that can adapt to different min/max levels."""
+  p = {}
+
+  # Node id starts from the input features and monotonically increase whenever
+  # a new node is added. Here is an example for level P3 - P7:
+  #     P7 (4)              P7" (12)
+  #     P6 (3)    P6' (5)   P6" (11)
+  #     P5 (2)    P5' (6)   P5" (10)
+  #     P4 (1)    P4' (7)   P4" (9)
+  #     P3 (0)              P3" (8)
+  # So output would be like:
+  # [
+  #   {'feat_level': 6, 'inputs_offsets': [3, 4]},  # for P6'
+  #   {'feat_level': 5, 'inputs_offsets': [2, 5]},  # for P5'
+  #   {'feat_level': 4, 'inputs_offsets': [1, 6]},  # for P4'
+  #   {'feat_level': 3, 'inputs_offsets': [0, 7]},  # for P3"
+  #   {'feat_level': 4, 'inputs_offsets': [1, 7, 8]},  # for P4"
+  #   {'feat_level': 5, 'inputs_offsets': [2, 6, 9]},  # for P5"
+  #   {'feat_level': 6, 'inputs_offsets': [3, 5, 10]},  # for P6"
+  #   {'feat_level': 7, 'inputs_offsets': [4, 11]},  # for P7"
+  # ]
+  num_levels = max_level - min_level + 1
+  node_ids = {min_level + i: [i] for i in range(num_levels)}
+
+  level_last_id = lambda level: node_ids[level][-1]
+  level_all_ids = lambda level: node_ids[level]
+  id_cnt = itertools.count(num_levels)
+
+  p['nodes'] = []
+  for i in range(max_level - 1, min_level - 1, -1):
+    # top-down path.
+    p['nodes'].append({
+        'feat_level': i,
+        'inputs_offsets': [level_last_id(i),
+                           level_last_id(i + 1)]
+    })
+    node_ids[i].append(next(id_cnt))
+
+  for i in range(min_level + 1, max_level + 1):
+    # bottom-up path.
+    p['nodes'].append({
+        'feat_level': i,
+        'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)]
+    })
+    node_ids[i].append(next(id_cnt))
+
+  return p
+
+
+def get_conv_op(conv_type):
+  """Gets convlution op."""
+  kernel_size = int(conv_type.split('_')[-1])
+  if conv_type.startswith('sep'):
+    conv_op = functools.partial(
+        tf.keras.layers.SeparableConv2D,
+        depth_multiplier=1,
+        kernel_size=(kernel_size, kernel_size))
+  elif conv_type.startswith('conv'):
+    conv_op = functools.partial(
+        tf.keras.layers.Conv2D, kernel_size=(kernel_size, kernel_size))
+  else:
+    raise ValueError('Unknown conv type: {}'.format(conv_type))
+  return conv_op
+
+
+def add_n(nodes):
+  """A customized add_n to add up a list of tensors."""
+  # tf.add_n is not supported by EdgeTPU, while tf.reduce_sum is not supported
+  # by GPU and runs slow on EdgeTPU because of the 5-dimension op.
+  with tf.name_scope('add_n'):
+    new_node = nodes[0]
+    for n in nodes[1:]:
+      new_node = new_node + n
+    return new_node
+
+
+def resize_nearest_neighbor(data, height_scale, width_scale):
+  """Nearest neighbor upsampling implementation."""
+  with tf.name_scope('nearest_upsampling'):
+    bs, h, w, c = data.get_shape().as_list()
+    bs = -1 if bs is None else bs
+    # Use reshape to quickly upsample the input.  The nearest pixel is selected
+    # implicitly via broadcasting.
+    data = tf.reshape(data, [bs, h, 1, w, 1, c]) * tf.ones(
+        [1, 1, height_scale, 1, width_scale, 1], dtype=data.dtype)
+    return tf.reshape(data, [bs, h * height_scale, w * width_scale, c])
+
+
+def resize(feat,
+           target_height,
+           target_width,
+           strategy,
+           training=False,
+           method='bilinear'):
+  """Resizes the spitial dimensions."""
+  dtype = feat.dtype
+  feat_shape = feat.get_shape()
+  if method == 'bilinear':
+    if strategy == 'tpu' and training:
+      if dtype == tf.bfloat16:
+        feat = tf.cast(feat, tf.float32)
+        feat = tf.image.resize(feat, [target_height, target_width])
+        feat = tf.cast(feat, dtype)
+      elif feat_shape.is_fully_defined():
+        # Batch dimension is known. Mimic resize[h,w] with
+        # resize[h,1]+resize[1,w] to reduce HBM padding.
+        b, h, w, c = feat_shape.as_list()
+        feat = tf.reshape(feat, [b, h, 1, -1])
+        feat = tf.image.resize(feat, [target_height, 1])
+        feat = tf.reshape(feat, [-1, 1, w, c])
+        feat = tf.image.resize(feat, [1, target_width])
+        feat = tf.reshape(feat, [b, target_height, target_width, c])
+      else:
+        feat = tf.image.resize(feat, [target_height, target_width])
+    else:
+      feat = tf.image.resize(feat, [target_height, target_width])
+  elif method == 'nearest':
+    _, h, w, _ = feat_shape.as_list()
+    if training and target_height % h == 0 and target_width % w == 0:
+
+      feat = resize_nearest_neighbor(feat, target_height // h,
+                                     target_width // w)
+    else:
+      feat = tf.cast(feat, tf.float32)
+      feat = tf.image.resize(feat, [target_height, target_width],
+                             tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+  else:
+    raise ValueError('Upsampling type {} is not supported.'.format(method))
+  return tf.cast(feat, dtype)
+
+
+class ResampleFeatureMap(tf.keras.layers.Layer):
+  """Resamples feature map for downsampling or upsampling."""
+
+  def __init__(self,
+               feat_level,
+               target_num_channels,
+               apply_bn=False,
+               is_training_bn=None,
+               conv_after_downsample=False,
+               strategy=None,
+               data_format=None,
+               pooling_type=None,
+               upsampling_type=None,
+               name='resample_p0'):
+    super().__init__(name=name)
+    self.apply_bn = apply_bn
+    self.is_training_bn = is_training_bn
+    self.data_format = data_format
+    self.target_num_channels = target_num_channels
+    self.feat_level = feat_level
+    self.strategy = strategy
+    self.conv_after_downsample = conv_after_downsample
+    self.pooling_type = pooling_type or 'max'
+    self.upsampling_type = upsampling_type or 'nearest'
+
+  def _pool2d(self, inputs, height, width, target_height, target_width):
+    """Pools the inputs to target height and width."""
+    height_stride_size = int((height - 1) // target_height + 1)
+    width_stride_size = int((width - 1) // target_width + 1)
+    if self.pooling_type == 'max':
+      return tf.keras.layers.MaxPooling2D(
+          pool_size=[height_stride_size + 1, width_stride_size + 1],
+          strides=[height_stride_size, width_stride_size],
+          padding='SAME',
+          data_format=self.data_format)(
+              inputs)
+    if self.pooling_type == 'avg':
+      return tf.keras.layers.AveragePooling2D(
+          pool_size=[height_stride_size + 1, width_stride_size + 1],
+          strides=[height_stride_size, width_stride_size],
+          padding='SAME',
+          data_format=self.data_format)(
+              inputs)
+    raise ValueError('Unsupported pooling type {}.'.format(self.pooling_type))
+
+  def _upsample2d(self, inputs, target_height, target_width, training):
+    return resize(inputs, target_height, target_width, self.strategy, training,
+                  self.upsampling_type)
+
+  def _maybe_apply_1x1(self, feat, training, num_channels):
+    """Applies 1x1 conv to change layer width if necessary."""
+    target_num_channels = self.target_num_channels
+    if target_num_channels is None or num_channels != target_num_channels:
+      feat = self.conv2d(feat)
+      if self.apply_bn:
+        feat = self.bn(feat, training=training)
+    return feat
+
+  def build(self, feat_shape):
+    num_channels = self.target_num_channels or feat_shape[-1]
+    self.conv2d = tf.keras.layers.Conv2D(
+        num_channels, (1, 1),
+        padding='same',
+        data_format=self.data_format,
+        name='conv2d')
+    self.bn = build_batch_norm(
+        is_training_bn=self.is_training_bn,
+        data_format=self.data_format,
+        strategy=self.strategy,
+        name='bn')
+    self.built = True
+    super().build(feat_shape)
+
+  def call(self, feat, training, all_feats):
+    hwc_idx = (2, 3, 1) if self.data_format == 'channels_first' else (1, 2, 3)
+    height, width, num_channels = [feat.shape.as_list()[i] for i in hwc_idx]
+    if all_feats:
+      target_feat_shape = all_feats[self.feat_level].shape.as_list()
+      target_height, target_width, _ = [target_feat_shape[i] for i in hwc_idx]
+    else:
+      # Default to downsampling if all_feats is empty.
+      target_height, target_width = (height + 1) // 2, (width + 1) // 2
+
+    # If conv_after_downsample is True, when downsampling, apply 1x1 after
+    # downsampling for efficiency.
+    if height > target_height and width > target_width:
+      if not self.conv_after_downsample:
+        feat = self._maybe_apply_1x1(feat, training, num_channels)
+      feat = self._pool2d(feat, height, width, target_height, target_width)
+      if self.conv_after_downsample:
+        feat = self._maybe_apply_1x1(feat, training, num_channels)
+    elif height <= target_height and width <= target_width:
+      feat = self._maybe_apply_1x1(feat, training, num_channels)
+      if height < target_height or width < target_width:
+        feat = self._upsample2d(feat, target_height, target_width, training)
+    else:
+      raise ValueError(
+          'Incompatible Resampling : feat shape {}x{} target_shape: {}x{}'
+          .format(height, width, target_height, target_width))
+
+    return feat
+
+
+class FNode(tf.keras.layers.Layer):
+  """A Keras Layer implementing BiFPN Node."""
+
+  def __init__(self,
+               feat_level,
+               inputs_offsets,
+               fpn_num_filters,
+               apply_bn_for_resampling,
+               is_training_bn,
+               conv_after_downsample,
+               conv_bn_act_pattern,
+               conv_type,
+               act_type,
+               strategy,
+               weight_method,
+               data_format,
+               pooling_type,
+               upsampling_type,
+               name='fnode'):
+    super().__init__(name=name)
+    self.feat_level = feat_level
+    self.inputs_offsets = inputs_offsets
+    self.fpn_num_filters = fpn_num_filters
+    self.apply_bn_for_resampling = apply_bn_for_resampling
+    self.conv_type = conv_type
+    self.act_type = act_type
+    self.is_training_bn = is_training_bn
+    self.conv_after_downsample = conv_after_downsample
+    self.strategy = strategy
+    self.data_format = data_format
+    self.weight_method = weight_method
+    self.conv_bn_act_pattern = conv_bn_act_pattern
+    self.pooling_type = pooling_type
+    self.upsampling_type = upsampling_type
+    self.resample_layers = []
+    self.vars = []
+
+  def fuse_features(self, nodes):
+    """Fuses features from different resolutions and return a weighted sum.
+
+    Args:
+      nodes: a list of tensorflow features at different levels
+
+    Returns:
+      A tensor denoting the fused feature.
+    """
+    dtype = nodes[0].dtype
+
+    if self.weight_method == 'attn':
+      edge_weights = [tf.cast(var, dtype=dtype) for var in self.vars]
+      normalized_weights = tf.nn.softmax(tf.stack(edge_weights))
+      nodes = tf.stack(nodes, axis=-1)
+      new_node = tf.reduce_sum(nodes * normalized_weights, -1)
+    elif self.weight_method == 'fastattn':
+      edge_weights = [
+          tf.nn.relu(tf.cast(var, dtype=dtype)) for var in self.vars
+      ]
+      weights_sum = add_n(edge_weights)
+      nodes = [
+          nodes[i] * edge_weights[i] / (weights_sum + 0.0001)
+          for i in range(len(nodes))
+      ]
+      new_node = add_n(nodes)
+    elif self.weight_method == 'channel_attn':
+      edge_weights = [tf.cast(var, dtype=dtype) for var in self.vars]
+      normalized_weights = tf.nn.softmax(tf.stack(edge_weights, -1), axis=-1)
+      nodes = tf.stack(nodes, axis=-1)
+      new_node = tf.reduce_sum(nodes * normalized_weights, -1)
+    elif self.weight_method == 'channel_fastattn':
+      edge_weights = [
+          tf.nn.relu(tf.cast(var, dtype=dtype)) for var in self.vars
+      ]
+      weights_sum = add_n(edge_weights)
+      nodes = [
+          nodes[i] * edge_weights[i] / (weights_sum + 0.0001)
+          for i in range(len(nodes))
+      ]
+      new_node = add_n(nodes)
+    elif self.weight_method == 'sum':
+      new_node = add_n(nodes)
+    else:
+      raise ValueError('unknown weight_method %s' % self.weight_method)
+
+    return new_node
+
+  def _add_wsm(self, initializer, shape=None):
+    for i, _ in enumerate(self.inputs_offsets):
+      name = 'WSM' + ('' if i == 0 else '_' + str(i))
+      self.vars.append(
+          self.add_weight(initializer=initializer, name=name, shape=shape))
+
+  def build(self, feats_shape):
+    for i, input_offset in enumerate(self.inputs_offsets):
+      name = 'resample_{}_{}_{}'.format(i, input_offset, len(feats_shape))
+      self.resample_layers.append(
+          ResampleFeatureMap(
+              self.feat_level,
+              self.fpn_num_filters,
+              self.apply_bn_for_resampling,
+              self.is_training_bn,
+              self.conv_after_downsample,
+              strategy=self.strategy,
+              data_format=self.data_format,
+              pooling_type=self.pooling_type,
+              upsampling_type=self.upsampling_type,
+              name=name))
+    if self.weight_method == 'attn':
+      self._add_wsm('ones')
+    elif self.weight_method == 'fastattn':
+      self._add_wsm('ones')
+    elif self.weight_method == 'channel_attn':
+      num_filters = int(self.fpn_num_filters)
+      self._add_wsm(tf.ones, num_filters)
+    elif self.weight_method == 'channel_fastattn':
+      num_filters = int(self.fpn_num_filters)
+      self._add_wsm(tf.ones, num_filters)
+    self.op_after_combine = OpAfterCombine(
+        self.is_training_bn,
+        self.conv_bn_act_pattern,
+        self.conv_type,
+        self.fpn_num_filters,
+        self.act_type,
+        self.data_format,
+        self.strategy,
+        name='op_after_combine{}'.format(len(feats_shape)))
+    self.built = True
+    super().build(feats_shape)
+
+  def call(self, feats, training):
+    nodes = []
+    for i, input_offset in enumerate(self.inputs_offsets):
+      input_node = feats[input_offset]
+      input_node = self.resample_layers[i](input_node, training, feats)
+      nodes.append(input_node)
+    new_node = self.fuse_features(nodes)
+    new_node = self.op_after_combine(new_node)
+    return feats + [new_node]
+
+
+class OpAfterCombine(tf.keras.layers.Layer):
+  """Operation after combining input features during feature fusiong."""
+
+  def __init__(self,
+               is_training_bn,
+               conv_bn_act_pattern,
+               conv_type,
+               fpn_num_filters,
+               act_type,
+               data_format,
+               strategy,
+               name='op_after_combine'):
+    super().__init__(name=name)
+    self.conv_bn_act_pattern = conv_bn_act_pattern
+    self.fpn_num_filters = fpn_num_filters
+    self.act_type = act_type
+    self.data_format = data_format
+    self.strategy = strategy
+    self.is_training_bn = is_training_bn
+    self.conv_op = get_conv_op(conv_type)(
+        filters=fpn_num_filters,
+        padding='same',
+        use_bias=not self.conv_bn_act_pattern,
+        data_format=self.data_format,
+        name='conv')
+    self.bn = build_batch_norm(
+        is_training_bn=self.is_training_bn,
+        data_format=self.data_format,
+        strategy=self.strategy,
+        name='bn')
+
+  def call(self, new_node, training):
+    if not self.conv_bn_act_pattern:
+      new_node = activation_fn(new_node, self.act_type)
+    new_node = self.conv_op(new_node)
+    new_node = self.bn(new_node, training=training)
+    if self.conv_bn_act_pattern:
+      new_node = activation_fn(new_node, self.act_type)
+    return new_node
+
+
+class FPNCells(tf.keras.layers.Layer):
+  """FPN cells."""
+
+  def __init__(self,
+               min_level=3,
+               max_level=8,
+               fpn_num_filters=96,
+               apply_bn_for_resampling=True,
+               is_training_bn=True,
+               conv_after_downsample=True,
+               conv_bn_act_pattern=True,
+               conv_type='sep_3',
+               act_type='swish',
+               strategy='tpu',
+               fpn_weight_method='sum',
+               data_format='channels_last',
+               pooling_type='avg',
+               upsampling_type='bilinear',
+               fpn_name='bifpn',
+               fpn_cell_repeats=4,
+               **kwargs):
+    super(FPNCells, self).__init__(**kwargs)
+    self.min_level = min_level
+    self.max_level = max_level
+    if fpn_name != 'bifpn':
+      raise ValueError('Only bifpn config is supported.')
+    self.fpn_config = bifpn_config(min_level, max_level)
+    self.cells = [
+        FPNCell(  # pylint: disable=g-complex-comprehension
+            min_level=min_level,
+            max_level=max_level,
+            fpn_num_filters=fpn_num_filters,
+            apply_bn_for_resampling=apply_bn_for_resampling,
+            is_training_bn=is_training_bn,
+            conv_after_downsample=conv_after_downsample,
+            conv_bn_act_pattern=conv_bn_act_pattern,
+            conv_type=conv_type,
+            act_type=act_type,
+            strategy=strategy,
+            fpn_weight_method=fpn_weight_method,
+            data_format=data_format,
+            pooling_type=pooling_type,
+            upsampling_type=upsampling_type,
+            fpn_name=fpn_name,
+            name='cell_%d' % rep) for rep in range(fpn_cell_repeats)
+    ]
+
+  def call(self, feats, training):
+    """Model call function."""
+    for cell in self.cells:
+      cell_feats = cell(feats, training)
+      min_level = self.min_level
+      max_level = self.max_level
+
+      feats = []
+      for level in range(min_level, max_level + 1):
+        for i, fnode in enumerate(reversed(self.fpn_config['nodes'])):
+          if fnode['feat_level'] == level:
+            feats.append(cell_feats[-1 - i])
+            break
+
+    return feats
+
+
+class FPNCell(tf.keras.layers.Layer):
+  """A single FPN cell."""
+
+  def __init__(self,
+               min_level=3,
+               max_level=7,
+               fpn_num_filters=80,
+               apply_bn_for_resampling=True,
+               is_training_bn=True,
+               conv_after_downsample=True,
+               conv_bn_act_pattern=True,
+               conv_type='sep_3',
+               act_type='swish',
+               strategy='tpu',
+               fpn_weight_method='sum',
+               data_format='channels_last',
+               pooling_type='avg',
+               upsampling_type='bilinear',
+               fpn_name='bifpn',
+               name='fpn_cell',
+               **kwargs):
+    super(FPNCell, self).__init__(**kwargs)
+    if fpn_name != 'bifpn':
+      raise ValueError('Only bifpn config is supported')
+    self.fpn_config = bifpn_config(min_level, max_level)
+    self.fnodes = []
+    for i, fnode_cfg in enumerate(self.fpn_config['nodes']):
+      logging.info('fnode %d : %s', i, fnode_cfg)
+      fnode = FNode(
+          fnode_cfg['feat_level'] - min_level,
+          fnode_cfg['inputs_offsets'],
+          fpn_num_filters=fpn_num_filters,
+          apply_bn_for_resampling=apply_bn_for_resampling,
+          is_training_bn=is_training_bn,
+          conv_after_downsample=conv_after_downsample,
+          conv_bn_act_pattern=conv_bn_act_pattern,
+          conv_type=conv_type,
+          act_type=act_type,
+          strategy=strategy,
+          weight_method=fpn_weight_method,
+          data_format=data_format,
+          pooling_type=pooling_type,
+          upsampling_type=upsampling_type,
+          name='fnode%d' % i)
+      self.fnodes.append(fnode)
+
+  def call(self, feats, training):
+    def _call(feats):
+      for fnode in self.fnodes:
+        feats = fnode(feats, training)
+      return feats
+    return _call(feats)
+
+
+class SegClassNet(tf.keras.layers.Layer):
+  """Segmentation class prediction network."""
+
+  def __init__(self,
+               min_level=3,
+               max_level=7,
+               output_filters=256,
+               apply_bn_for_resampling=True,
+               is_training_bn=True,
+               conv_after_downsample=True,
+               conv_bn_act_pattern=True,
+               head_conv_type='sep_3',
+               act_type='swish',
+               strategy='tpu',
+               output_weight_method='attn',
+               data_format='channels_last',
+               pooling_type='avg',
+               upsampling_type='bilinear',
+               fullres_output=False,
+               fullres_skip_connections=False,
+               num_classes=32,
+               name='seg_class_net'):
+    """Initialize the SegClassNet.
+
+    Args:
+      min_level: minimum feature level to use in the head.
+      max_level: maximum feature level to use in the head.
+      output_filters: output filter size.
+      apply_bn_for_resampling:
+      whether to apply batch normalization for resampling.
+      is_training_bn: is training mode.
+      conv_after_downsample: whether to apply conv after downsample.
+      conv_bn_act_pattern: conv batch norm activation pattern.
+      head_conv_type: head convolution type.
+      act_type: activation type.
+      strategy: device strategy, eg. tpu.
+      output_weight_method: output weight method.
+      data_format: data format.
+      pooling_type: pooling type.
+      upsampling_type: upsamplihng type.
+      fullres_output: full resolution output.
+      fullres_skip_connections: full resolution skip connection.
+      num_classes: number of classes.
+      name: the name of this layer.
+    """
+
+    super().__init__(name=name)
+    conv2d_layer = get_conv_op(head_conv_type)
+    self.min_level = min_level
+    self.max_level = max_level
+    self.fullres_output = fullres_output
+    self.fullres_conv_transpose = fullres_skip_connections
+
+    self.fnode = FNode(
+        0,  # Always use the first level with highest resolution.
+        list(range(max_level - min_level + 1)),
+        output_filters,
+        apply_bn_for_resampling,
+        is_training_bn,
+        conv_after_downsample,
+        conv_bn_act_pattern,
+        head_conv_type,
+        act_type,
+        strategy,
+        output_weight_method,
+        data_format,
+        pooling_type,
+        upsampling_type,
+        name='seg_class_fusion')
+
+    if fullres_output:
+      self.fullres_conv_transpose = {}
+      self.fullres_conv = {}
+      for i in reversed(range(min_level)):
+        num_filters = min(num_classes * 2**(i + 1),
+                          output_filters)
+        self.fullres_conv[str(i)] = conv2d_layer(
+            filters=num_filters,
+            data_format=data_format,
+            kernel_size=3,
+            strides=1,
+            padding='same',
+            activation=act_type,
+            name='fullres_conv_%d' % i)
+        self.fullres_conv_transpose[str(i)] = tf.keras.layers.Conv2DTranspose(
+            filters=num_filters,
+            data_format=data_format,
+            kernel_size=3,
+            strides=2,
+            padding='same',
+            activation=act_type,
+            name='fullres_conv_transpose_%d' % i)
+
+    self.classes = conv2d_layer(
+        num_classes,
+        bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
+        padding='same',
+        name='seg-class-predict')
+
+  def call(self, inputs, backbone_feats, training):
+    """Call SegClassNet."""
+
+    seg_output = self.fnode(inputs, training)
+    net = seg_output[-1]
+
+    if self.fullres_output:
+      for i in reversed(range(self.min_level)):
+        if self.config.fullres_skip_connections:
+          net = tf.keras.layers.Concatenate()([net, backbone_feats[i + 1]])
+        net = self.fullres_conv[str(i)](net)
+        net = self.fullres_conv_transpose[str(i)](net)
+
+    class_outputs = self.classes(net)
+    return class_outputs
--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definitions for MobilenetEdgeTPU image classification models."""
+from typing import Any, Dict, Optional, Text
+
+# Import libraries
+from absl import logging
+import tensorflow as tf
+
+from official.projects.edgetpu.vision.modeling import common_modules
+from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v1_model_blocks
+
+ModelConfig = mobilenet_edgetpu_v1_model_blocks.ModelConfig
+
+MODEL_CONFIGS = {
+    # (width, depth, resolution, dropout)
+    'mobilenet_edgetpu': ModelConfig.from_args(1.0, 1.0, 224, 0.1),
+    'mobilenet_edgetpu_dm1p25': ModelConfig.from_args(1.25, 1.0, 224, 0.1),
+    'mobilenet_edgetpu_dm1p5': ModelConfig.from_args(1.5, 1.0, 224, 0.1),
+    'mobilenet_edgetpu_dm1p75': ModelConfig.from_args(1.75, 1.0, 224, 0.1)
+}
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class MobilenetEdgeTPU(tf.keras.Model):
+  """Wrapper class for a MobilenetEdgeTPU Keras model.
+
+  Contains helper methods to build, manage, and save metadata about the model.
+  """
+
+  def __init__(self,
+               config: Optional[ModelConfig] = None,
+               overrides: Optional[Dict[Text, Any]] = None):
+    """Create a MobilenetEdgeTPU model.
+
+    Args:
+      config: (optional) the main model parameters to create the model
+      overrides: (optional) a dict containing keys that can override config
+    """
+    overrides = overrides or {}
+    config = config or ModelConfig()
+
+    self.config = config.replace(**overrides)
+
+    input_channels = self.config.input_channels
+    model_name = self.config.model_name
+    if isinstance(self.config.resolution, tuple):
+      input_shape = (self.config.resolution[0], self.config.resolution[1],
+                     input_channels)
+    else:
+      input_shape = (self.config.resolution, self.config.resolution,
+                     input_channels)
+    image_input = tf.keras.layers.Input(shape=input_shape)
+
+    output = mobilenet_edgetpu_v1_model_blocks.mobilenet_edgetpu(
+        image_input, self.config)
+
+    if not isinstance(output, dict):
+      # Cast to float32 in case we have a different model dtype
+      output = tf.cast(output, tf.float32)
+      self._output_specs = output.get_shape()
+    else:
+      self._output_specs = {
+          feature: output[feature].get_shape() for feature in output
+      }
+
+    logging.info('Building model %s with params %s',
+                 model_name,
+                 self.config)
+
+    super(MobilenetEdgeTPU, self).__init__(
+        inputs=image_input, outputs=output, name=model_name)
+
+  @classmethod
+  def from_name(cls,
+                model_name: str,
+                model_weights_path: Optional[str] = None,
+                checkpoint_format: Optional[str] = 'tf_checkpoint',
+                overrides: Optional[Dict[str, Any]] = None):
+    """Construct an MobilenetEdgeTPU model from a predefined model name.
+
+    E.g., `MobilenetEdgeTPU.from_name('mobilenet_edgetpu')`.
+
+    Args:
+      model_name: the predefined model name
+      model_weights_path: the path to the weights (h5 file or saved model dir)
+      checkpoint_format: the model weights format. One of 'tf_checkpoint' or
+        'keras_checkpoint'.
+      overrides: (optional) a dict containing keys that can override config
+
+    Returns:
+      A constructed EfficientNet instance.
+    """
+    model_configs = dict(MODEL_CONFIGS)
+    overrides = dict(overrides) if overrides else {}
+
+    # One can define their own custom models if necessary
+    model_configs.update(overrides.pop('model_config', {}))
+
+    if model_name not in model_configs:
+      raise ValueError('Unknown model name {}'.format(model_name))
+
+    config = model_configs[model_name]
+
+    model = cls(config=config, overrides=overrides)
+
+    if model_weights_path:
+      common_modules.load_weights(model,
+                                  model_weights_path,
+                                  checkpoint_format=checkpoint_format)
+
+    return model
+
+  @property
+  def output_specs(self):
+    """A dict of {level: TensorShape} pairs for the model output."""
+    return self._output_specs
--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model_blocks.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model_blocks.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definitions for MobilenetEdgeTPU image classification models."""
+import dataclasses
+import math
+from typing import Any, Optional, Tuple, Union
+
+# Import libraries
+from absl import logging
+import tensorflow as tf
+
+from official.modeling import tf_utils
+from official.modeling.hyperparams import base_config
+from official.projects.edgetpu.vision.modeling import common_modules
+
+
+@dataclasses.dataclass
+class BlockConfig(base_config.Config):
+  """Config for a single MB Conv Block."""
+  input_filters: int = 0
+  output_filters: int = 0
+  kernel_size: int = 3
+  num_repeat: int = 1
+  expand_ratio: int = 1
+  strides: Tuple[int, int] = (1, 1)
+  se_ratio: Optional[float] = None
+  id_skip: bool = True
+  fused_conv: bool = False
+  conv_type: str = 'depthwise'
+
+
+@dataclasses.dataclass
+class ModelConfig(base_config.Config):
+  """Default Config for MobilenetEdgeTPU."""
+  width_coefficient: float = 1.0
+  depth_coefficient: float = 1.0
+  resolution: Union[int, Tuple[int, int]] = 224
+  dropout_rate: float = 0.1
+  blocks: Tuple[BlockConfig, ...] = (
+      # (input_filters, output_filters, kernel_size, num_repeat,
+      #  expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
+      # pylint: disable=bad-whitespace
+      BlockConfig.from_args(32, 16, 3, 1, 1, (1, 1), conv_type='no_depthwise'),
+      BlockConfig.from_args(16, 32, 3, 1, 8, (2, 2), fused_conv=True),
+      BlockConfig.from_args(32, 32, 3, 3, 4, (1, 1), conv_type='no_depthwise'),
+      BlockConfig.from_args(32, 48, 3, 1, 8, (2, 2), fused_conv=True),
+      BlockConfig.from_args(48, 48, 3, 3, 4, (1, 1), conv_type='no_depthwise'),
+      BlockConfig.from_args(48, 96, 3, 1, 8, (2, 2)),
+      BlockConfig.from_args(96, 96, 3, 3, 4, (1, 1)),
+      BlockConfig.from_args(96, 96, 3, 1, 8, (1, 1), id_skip=False),
+      BlockConfig.from_args(96, 96, 3, 3, 4, (1, 1)),
+      BlockConfig.from_args(96, 160, 5, 1, 8, (2, 2)),
+      BlockConfig.from_args(160, 160, 5, 3, 4, (1, 1)),
+      BlockConfig.from_args(160, 192, 3, 1, 8, (1, 1)),
+      # pylint: enable=bad-whitespace
+  )
+  stem_base_filters: int = 32
+  top_base_filters: int = 1280
+  activation: str = 'relu'
+  batch_norm: str = 'default'
+  bn_momentum: float = 0.99
+  bn_epsilon: float = 1e-3
+  # While the original implementation used a weight decay of 1e-5,
+  # tf.nn.l2_loss divides it by 2, so we halve this to compensate in Keras
+  weight_decay: float = 5e-6
+  drop_connect_rate: float = 0.1
+  depth_divisor: int = 8
+  min_depth: Optional[int] = None
+  # No Squeeze/Excite for MobilenetEdgeTPU
+  use_se: bool = False
+  input_channels: int = 3
+  num_classes: int = 1001
+  model_name: str = 'mobilenet_edgetpu'
+  rescale_input: bool = False
+  data_format: str = 'channels_last'
+  dtype: str = 'float32'
+  backbone_only: bool = False
+
+
+CONV_KERNEL_INITIALIZER = {
+    'class_name': 'VarianceScaling',
+    'config': {
+        'scale': 2.0,
+        'mode': 'fan_out',
+        # Note: this is a truncated normal distribution
+        'distribution': 'normal'
+    }
+}
+
+DENSE_KERNEL_INITIALIZER = {
+    'class_name': 'VarianceScaling',
+    'config': {
+        'scale': 1 / 3.0,
+        'mode': 'fan_out',
+        'distribution': 'uniform'
+    }
+}
+
+
+# TODO(longy): Reuse the utility functions for V1/V2 models.
+def round_filters(filters: int,
+                  config: ModelConfig) -> int:
+  """Round number of filters based on width coefficient."""
+  width_coefficient = config.width_coefficient
+  min_depth = config.min_depth
+  divisor = config.depth_divisor
+  orig_filters = filters
+
+  if not width_coefficient:
+    return filters
+
+  filters *= width_coefficient
+  min_depth = min_depth or divisor
+  new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
+  # Make sure that round down does not go down by more than 10%.
+  if new_filters < 0.9 * filters:
+    new_filters += divisor
+  logging.info('round_filter input=%s output=%s', orig_filters, new_filters)
+  return int(new_filters)
+
+
+def round_repeats(repeats: int, depth_coefficient: float) -> int:
+  """Round number of repeats based on depth coefficient."""
+  return int(math.ceil(depth_coefficient * repeats))
+
+
+def conv2d_block(inputs: tf.Tensor,
+                 conv_filters: Optional[int],
+                 config: ModelConfig,
+                 kernel_size: Any = (1, 1),
+                 strides: Any = (1, 1),
+                 use_batch_norm: bool = True,
+                 use_bias: bool = False,
+                 activation: Any = None,
+                 depthwise: bool = False,
+                 name: Optional[str] = None):
+  """A conv2d followed by batch norm and an activation."""
+  batch_norm = common_modules.get_batch_norm(config.batch_norm)
+  bn_momentum = config.bn_momentum
+  bn_epsilon = config.bn_epsilon
+  data_format = tf.keras.backend.image_data_format()
+  weight_decay = config.weight_decay
+
+  name = name or ''
+
+  # Collect args based on what kind of conv2d block is desired
+  init_kwargs = {
+      'kernel_size': kernel_size,
+      'strides': strides,
+      'use_bias': use_bias,
+      'padding': 'same',
+      'name': name + '_conv2d',
+      'kernel_regularizer': tf.keras.regularizers.l2(weight_decay),
+      'bias_regularizer': tf.keras.regularizers.l2(weight_decay),
+  }
+
+  if depthwise:
+    conv2d = tf.keras.layers.DepthwiseConv2D
+    init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER})
+  else:
+    conv2d = tf.keras.layers.Conv2D
+    init_kwargs.update({'filters': conv_filters,
+                        'kernel_initializer': CONV_KERNEL_INITIALIZER})
+
+  x = conv2d(**init_kwargs)(inputs)
+
+  if use_batch_norm:
+    bn_axis = 1 if data_format == 'channels_first' else -1
+    x = batch_norm(axis=bn_axis,
+                   momentum=bn_momentum,
+                   epsilon=bn_epsilon,
+                   name=name + '_bn')(x)
+
+  if activation is not None:
+    x = tf.keras.layers.Activation(activation,
+                                   name=name + '_activation')(x)
+  return x
+
+
+def mb_conv_block(inputs: tf.Tensor,
+                  block: BlockConfig,
+                  config: ModelConfig,
+                  prefix: Optional[str] = None):
+  """Mobile Inverted Residual Bottleneck.
+
+  Args:
+    inputs: the Keras input to the block
+    block: BlockConfig, arguments to create a Block
+    config: ModelConfig, a set of model parameters
+    prefix: prefix for naming all layers
+
+  Returns:
+    the output of the block
+  """
+  use_se = config.use_se
+  activation = tf_utils.get_activation(config.activation)
+  drop_connect_rate = config.drop_connect_rate
+  data_format = tf.keras.backend.image_data_format()
+  use_depthwise = block.conv_type == 'depthwise'
+  prefix = prefix or ''
+
+  filters = block.input_filters * block.expand_ratio
+
+  x = inputs
+
+  if block.fused_conv:
+    # If we use fused mbconv, skip expansion and use regular conv.
+    x = conv2d_block(x,
+                     filters,
+                     config,
+                     kernel_size=block.kernel_size,
+                     strides=block.strides,
+                     activation=activation,
+                     name=prefix + 'fused')
+  else:
+    if block.expand_ratio != 1:
+      # Expansion phase
+      kernel_size = (1, 1) if use_depthwise else (3, 3)
+      x = conv2d_block(x,
+                       filters,
+                       config,
+                       kernel_size=kernel_size,
+                       activation=activation,
+                       name=prefix + 'expand')
+
+    # Depthwise Convolution
+    if use_depthwise:
+      x = conv2d_block(x,
+                       conv_filters=None,
+                       config=config,
+                       kernel_size=block.kernel_size,
+                       strides=block.strides,
+                       activation=activation,
+                       depthwise=True,
+                       name=prefix + 'depthwise')
+
+  # Squeeze and Excitation phase
+  if use_se:
+    assert block.se_ratio is not None
+    assert 0 < block.se_ratio <= 1
+    num_reduced_filters = max(1, int(
+        block.input_filters * block.se_ratio
+    ))
+
+    if data_format == 'channels_first':
+      se_shape = (filters, 1, 1)
+    else:
+      se_shape = (1, 1, filters)
+
+    se = tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x)
+    se = tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')(se)
+
+    se = conv2d_block(se,
+                      num_reduced_filters,
+                      config,
+                      use_bias=True,
+                      use_batch_norm=False,
+                      activation=activation,
+                      name=prefix + 'se_reduce')
+    se = conv2d_block(se,
+                      filters,
+                      config,
+                      use_bias=True,
+                      use_batch_norm=False,
+                      activation='sigmoid',
+                      name=prefix + 'se_expand')
+    x = tf.keras.layers.multiply([x, se], name=prefix + 'se_excite')
+
+  # Output phase
+  x = conv2d_block(x,
+                   block.output_filters,
+                   config,
+                   activation=None,
+                   name=prefix + 'project')
+
+  # Add identity so that quantization-aware training can insert quantization
+  # ops correctly.
+  x = tf.keras.layers.Activation('linear', name=prefix + 'id')(x)
+
+  if (block.id_skip
+      and all(s == 1 for s in block.strides)
+      and block.input_filters == block.output_filters):
+    if drop_connect_rate and drop_connect_rate > 0:
+      # Apply dropconnect
+      # The only difference between dropout and dropconnect in TF is scaling by
+      # drop_connect_rate during training. See:
+      # https://github.com/keras-team/keras/pull/9898#issuecomment-380577612
+      x = tf.keras.layers.Dropout(drop_connect_rate,
+                                  noise_shape=(None, 1, 1, 1),
+                                  name=prefix + 'drop')(x)
+
+    x = tf.keras.layers.add([x, inputs], name=prefix + 'add')
+
+  return x
+
+
+def mobilenet_edgetpu(image_input: tf.keras.layers.Input, config: ModelConfig):  # pytype: disable=invalid-annotation  # typed-keras
+  """Creates a MobilenetEdgeTPU graph given the model parameters.
+
+  This function is wrapped by the `MobilenetEdgeTPU` class to make a
+  tf.keras.Model.
+
+  Args:
+    image_input: the input batch of images
+    config: the model config
+
+  Returns:
+    The output of clossification model or if backbone is needed, dictionary with
+    backbone feature levels.
+  """
+  depth_coefficient = config.depth_coefficient
+  blocks = config.blocks
+  stem_base_filters = config.stem_base_filters
+  top_base_filters = config.top_base_filters
+  activation = tf_utils.get_activation(config.activation)
+  dropout_rate = config.dropout_rate
+  drop_connect_rate = config.drop_connect_rate
+  num_classes = config.num_classes
+  input_channels = config.input_channels
+  rescale_input = config.rescale_input
+  data_format = tf.keras.backend.image_data_format()
+  dtype = config.dtype
+  weight_decay = config.weight_decay
+
+  x = image_input
+  if data_format == 'channels_first':
+    # Happens on GPU/TPU if available.
+    x = tf.keras.layers.Permute((3, 1, 2))(x)
+  if rescale_input:
+    x = common_modules.normalize_images(
+        x, num_channels=input_channels, dtype=dtype, data_format=data_format)
+
+  # Build stem
+  x = conv2d_block(x,
+                   round_filters(stem_base_filters, config),
+                   config,
+                   kernel_size=[3, 3],
+                   strides=[2, 2],
+                   activation=activation,
+                   name='stem')
+
+  # Build blocks
+  num_blocks_total = sum(block.num_repeat for block in blocks)
+  block_num = 0
+
+  backbone_levels = {}
+  for stack_idx, block in enumerate(blocks):
+    assert block.num_repeat > 0
+    # Update block input and output filters based on depth multiplier
+    block = block.replace(
+        input_filters=round_filters(block.input_filters, config),
+        output_filters=round_filters(block.output_filters, config),
+        num_repeat=round_repeats(block.num_repeat, depth_coefficient))
+
+    # The first block needs to take care of stride and filter size increase
+    drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
+    config = config.replace(drop_connect_rate=drop_rate)
+    block_prefix = 'stack_{}/block_0/'.format(stack_idx)
+    x = mb_conv_block(x, block, config, block_prefix)
+    block_num += 1
+    if block.num_repeat > 1:
+      block = block.replace(
+          input_filters=block.output_filters,
+          strides=[1, 1]
+      )
+
+      for block_idx in range(block.num_repeat - 1):
+        drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
+        config = config.replace(drop_connect_rate=drop_rate)
+        block_prefix = 'stack_{}/block_{}/'.format(stack_idx, block_idx + 1)
+        x = mb_conv_block(x, block, config, prefix=block_prefix)
+        block_num += 1
+    backbone_levels[str(stack_idx)] = x
+
+  if config.backbone_only:
+    return backbone_levels
+
+  # Build top
+  x = conv2d_block(x,
+                   round_filters(top_base_filters, config),
+                   config,
+                   activation=activation,
+                   name='top')
+
+  # Build classifier
+  pool_size = (x.shape.as_list()[1], x.shape.as_list()[2])
+  x = tf.keras.layers.AveragePooling2D(pool_size, name='top_pool')(x)
+  if dropout_rate and dropout_rate > 0:
+    x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x)
+  x = tf.keras.layers.Conv2D(
+      num_classes,
+      1,
+      kernel_initializer=DENSE_KERNEL_INITIALIZER,
+      kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
+      bias_regularizer=tf.keras.regularizers.l2(weight_decay),
+      name='logits')(
+          x)
+  x = tf.keras.layers.Activation('softmax', name='probs')(x)
+  x = tf.squeeze(x, axis=[1, 2])
+
+  return x
--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model_test.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v1_model_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for mobilenet_edgetpu model."""
+
+import os
+
+import tensorflow as tf
+
+from official.projects.edgetpu.vision.modeling import common_modules
+from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v1_model
+from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v1_model_blocks
+from official.vision.image_classification import preprocessing
+
+# TODO(b/151324383): Enable once training is supported for mobilenet-edgetpu
+EXAMPLE_IMAGE = ('third_party/tensorflow_models/official/vision/'
+                 'image_classification/testdata/panda.jpg')
+
+CKPTS = 'gs://**/efficientnets'
+
+
+class MobilenetEdgeTPUBlocksTest(tf.test.TestCase):
+
+  def setUp(self):
+    super(tf.test.TestCase, self).setUp()
+    # Ensure no model duplicates
+    tf.keras.backend.clear_session()
+
+  def test_bottleneck_block(self):
+    """Test for creating a model with bottleneck block arguments."""
+    images = tf.zeros((4, 224, 224, 3), dtype=tf.float32)
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    blocks = [
+        mobilenet_edgetpu_v1_model_blocks.BlockConfig.from_args(
+            input_filters=3,
+            output_filters=6,
+            kernel_size=3,
+            num_repeat=3,
+            expand_ratio=6,
+            strides=(2, 2),
+            fused_conv=False,
+        )
+    ]
+    config = mobilenet_edgetpu_v1_model.ModelConfig.from_args(
+        blocks=blocks,
+        num_classes=10,
+        use_se=False,
+    )
+
+    model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU(config)
+    outputs = model(images, training=True)
+    self.assertEqual((4, 10), outputs.shape)
+
+    ref_var_names = set([
+        'stem_conv2d/kernel:0',
+        'stem_bn/gamma:0',
+        'stem_bn/beta:0',
+        'stack_0/block_0/expand_conv2d/kernel:0',
+        'stack_0/block_0/expand_bn/gamma:0',
+        'stack_0/block_0/expand_bn/beta:0',
+        'stack_0/block_0/depthwise_conv2d/depthwise_kernel:0',
+        'stack_0/block_0/depthwise_bn/gamma:0',
+        'stack_0/block_0/depthwise_bn/beta:0',
+        'stack_0/block_0/project_conv2d/kernel:0',
+        'stack_0/block_0/project_bn/gamma:0',
+        'stack_0/block_0/project_bn/beta:0',
+        'stack_0/block_1/expand_conv2d/kernel:0',
+        'stack_0/block_1/expand_bn/gamma:0',
+        'stack_0/block_1/expand_bn/beta:0',
+        'stack_0/block_1/depthwise_conv2d/depthwise_kernel:0',
+        'stack_0/block_1/depthwise_bn/gamma:0',
+        'stack_0/block_1/depthwise_bn/beta:0',
+        'stack_0/block_1/project_conv2d/kernel:0',
+        'stack_0/block_1/project_bn/gamma:0',
+        'stack_0/block_1/project_bn/beta:0',
+        'stack_0/block_2/expand_conv2d/kernel:0',
+        'stack_0/block_2/expand_bn/gamma:0',
+        'stack_0/block_2/expand_bn/beta:0',
+        'stack_0/block_2/depthwise_conv2d/depthwise_kernel:0',
+        'stack_0/block_2/depthwise_bn/gamma:0',
+        'stack_0/block_2/depthwise_bn/beta:0',
+        'stack_0/block_2/project_conv2d/kernel:0',
+        'stack_0/block_2/project_bn/gamma:0',
+        'stack_0/block_2/project_bn/beta:0',
+        'top_conv2d/kernel:0',
+        'top_bn/gamma:0',
+        'top_bn/beta:0',
+        'logits/kernel:0',
+        'logits/bias:0'
+    ])
+
+    var_names = set([var.name for var in model.trainable_variables])
+    self.assertEqual(var_names, ref_var_names)
+
+  def test_fused_bottleneck_block(self):
+    """Test for creating a model with fused bottleneck block arguments."""
+    images = tf.zeros((4, 224, 224, 3), dtype=tf.float32)
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    blocks = [
+        mobilenet_edgetpu_v1_model_blocks.BlockConfig.from_args(
+            input_filters=3,
+            output_filters=6,
+            kernel_size=3,
+            num_repeat=3,
+            expand_ratio=6,
+            strides=(2, 2),
+            fused_conv=True,
+        )
+    ]
+    config = mobilenet_edgetpu_v1_model.ModelConfig.from_args(
+        blocks=blocks,
+        num_classes=10,
+        use_se=False,
+    )
+
+    model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU(config)
+
+    outputs = model(images, training=True)
+    self.assertEqual((4, 10), outputs.shape)
+
+    var_names = {var.name for var in model.trainable_variables}
+
+    ref_var_names = [
+        'stack_0/block_0/fused_conv2d/kernel:0',
+        'stack_0/block_1/fused_conv2d/kernel:0',
+        'stack_0/block_2/fused_conv2d/kernel:0',
+    ]
+
+    for ref_var_name in ref_var_names:
+      self.assertIn(ref_var_name, var_names)
+
+  def test_variables(self):
+    """Test for variables in blocks to be included in `model.variables`."""
+    images = tf.zeros((4, 224, 224, 3), dtype=tf.float32)
+
+    tf.keras.backend.set_image_data_format('channels_last')
+
+    blocks = [
+        mobilenet_edgetpu_v1_model_blocks.BlockConfig.from_args(
+            input_filters=3,
+            output_filters=6,
+            kernel_size=3,
+            num_repeat=3,
+            expand_ratio=6,
+            id_skip=False,
+            strides=(2, 2),
+            se_ratio=0.8,
+            fused_conv=False,
+        )
+    ]
+    config = mobilenet_edgetpu_v1_model.ModelConfig.from_args(
+        blocks=blocks,
+        num_classes=10,
+        use_se=True,
+    )
+
+    model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU(config)
+
+    _ = model(images, training=True)
+    var_names = {var.name for var in model.variables}
+
+    self.assertIn('stack_0/block_0/depthwise_conv2d/depthwise_kernel:0',
+                  var_names)
+
+
+class MobilenetEdgeTPUBuildTest(tf.test.TestCase):
+
+  def setUp(self):
+    super(tf.test.TestCase, self).setUp()
+    # Ensure no model duplicates
+    tf.keras.backend.clear_session()
+
+  def test_create_mobilenet_edgetpu(self):
+    model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU()
+    self.assertEqual(common_modules.count_params(model), 4092713)
+
+
+class MobilenetEdgeTPUPredictTest(tf.test.TestCase):
+
+  def setUp(self):
+    super(tf.test.TestCase, self).setUp()
+    # Ensure no model duplicates
+    tf.keras.backend.clear_session()
+
+  def _copy_saved_model_to_local(self, model_ckpt):
+    # Copy saved model to local first for speed
+    tmp_path = '/tmp/saved_model'
+    tf.io.gfile.RecursivelyCopyDir(model_ckpt, tmp_path, overwrite=True)
+    return tmp_path
+
+  def _test_prediction(self, model_name, image_size):
+    model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU.from_name(model_name)
+
+    # Predict image filled with zeros
+    images = tf.zeros((4, image_size, image_size, 3), dtype=tf.float32)
+    pred = model(images, training=False)
+    self.assertEqual(pred.shape, (4, 1000))
+
+    # Predict image with loaded weights
+    images = preprocessing.load_eval_image(EXAMPLE_IMAGE, image_size)
+    images = tf.expand_dims(images, axis=0)
+    model_ckpt = os.path.join(CKPTS, model_name)
+    model_ckpt = self._copy_saved_model_to_local(model_ckpt)
+    model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU.from_name(
+        model_name, model_weights_path=model_ckpt)
+
+    pred = model(images, training=False)
+    pred = pred[0].numpy()
+    pred_idx, pred_prob = pred.argmax(), pred.max()
+
+    # 388 is 'giant panda' (see labels_map_file)
+    self.assertEqual(pred_idx, 388)
+    self.assertGreater(pred_prob, 0.75)
+
+  def test_mobilenet_edgetpu_image_shape(self):
+    self.skipTest(
+        'TODO(b/151324383): Enable once training is supported for mobilenet-edgetpu'
+    )
+    params = dict(input_channels=5, num_classes=20, rescale_input=False)
+    model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU.from_name(
+        'mobilenet_edgetpu', overrides=params)
+
+    images = tf.zeros((6, 100, 38, 5), dtype=tf.float32)
+    pred = model(images, training=False)
+
+    self.assertEqual(pred.shape, (6, 20))
+
+  def test_mobilenet_edgetpu_predict(self):
+    self.skipTest(
+        'TODO(b/151324383): Enable once training is supported for mobilenet-edgetpu'
+    )
+    self._test_prediction('mobilenet_edgetpu', 224)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definitions for MobilenetEdgeTPUV2 image classification models."""
+
+from typing import Any, Mapping, Optional
+from absl import logging
+import tensorflow as tf
+
+from official.projects.edgetpu.vision.modeling import common_modules
+from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v2_model_blocks
+
+ModelConfig = mobilenet_edgetpu_v2_model_blocks.ModelConfig
+
+MODEL_CONFIGS = {
+    'mobilenet_edgetpu_v2':
+        mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_s(),
+    'mobilenet_edgetpu_v2_tiny':
+        mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_tiny(),
+    'mobilenet_edgetpu_v2_xs':
+        mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_xs(),
+    'mobilenet_edgetpu_v2_s':
+        mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_s(),
+    'mobilenet_edgetpu_v2_m':
+        mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_m(),
+    'mobilenet_edgetpu_v2_l':
+        mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_l(),
+    'autoseg_edgetpu_backbone_xs':
+        mobilenet_edgetpu_v2_model_blocks.autoseg_edgetpu_backbone_xs(),
+    'autoseg_edgetpu_backbone_s':
+        mobilenet_edgetpu_v2_model_blocks.autoseg_edgetpu_backbone_s(),
+    'autoseg_edgetpu_backbone_m':
+        mobilenet_edgetpu_v2_model_blocks.autoseg_edgetpu_backbone_m(),
+}
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class MobilenetEdgeTPUV2(tf.keras.Model):
+  """Wrapper class for a MobilenetEdgeTPUV2 Keras model.
+
+  Contains helper methods to build, manage, and save metadata about the model.
+  """
+
+  def __init__(self,
+               model_config_name: Optional[str] = None,
+               overrides: Optional[Mapping[str, Any]] = None,
+               **kwargs):
+    """Creates a MobilenetEdgeTPUV2 model.
+
+    Args:
+      model_config_name: (optional) the model parameters to create the model.
+      overrides: (optional) a dict containing keys that can override config.
+      **kwargs: All the rest model arguments in a dictionary.
+    """
+    self.model_config_name = model_config_name
+    self._self_setattr_tracking = False
+    self.overrides = overrides or {}
+
+    if model_config_name is None:
+      model_config = ModelConfig()
+    else:
+      if model_config_name not in MODEL_CONFIGS:
+        supported_model_list = list(MODEL_CONFIGS.keys())
+        raise ValueError(f'Unknown model name {model_config_name}. Only support'
+                         f'model configs in {supported_model_list}.')
+      model_config = MODEL_CONFIGS[model_config_name]
+
+    self.config = model_config.replace(**self.overrides)
+
+    input_channels = self.config.input_channels
+    model_name = self.config.model_name
+    if isinstance(self.config.resolution, tuple):
+      input_shape = (self.config.resolution[0], self.config.resolution[1],
+                     input_channels)
+    else:
+      input_shape = (self.config.resolution, self.config.resolution,
+                     input_channels)
+    image_input = tf.keras.layers.Input(shape=input_shape)
+
+    output = mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2(
+        image_input, self.config)
+
+    if not isinstance(output, list):
+      # Cast to float32 in case we have a different model dtype
+      output = tf.cast(output, tf.float32)
+      self._output_specs = output.get_shape()
+    else:
+      if self.config.features_as_dict:
+        # Dict output is required for the decoder ASPP module.
+        self._output_specs = {
+            str(i): output[i].get_shape() for i in range(len(output))
+        }
+        output = {str(i): output[i] for i in range(len(output))}
+      else:
+        # edgetpu/tasks/segmentation assumes features as list.
+        self._output_specs = [feat.get_shape() for feat in output]
+
+    logging.info('Building model %s with params %s',
+                 model_name,
+                 self.config)
+
+    super(MobilenetEdgeTPUV2, self).__init__(
+        inputs=image_input, outputs=output, **kwargs)
+    self._self_setattr_tracking = True
+
+  @classmethod
+  def from_name(cls,
+                model_name: str,
+                model_weights_path: Optional[str] = None,
+                checkpoint_format: Optional[str] = 'tf_checkpoint',
+                overrides: Optional[Mapping[str, Any]] = None):
+    """Constructs an MobilenetEdgeTPUV2 model from a predefined model name.
+
+    E.g., `MobilenetEdgeTPUV2.from_name('mobilenet_edgetpu_v2_s')`.
+
+    Args:
+      model_name: the predefined model name
+      model_weights_path: the path to the weights (h5 file or saved model dir)
+      checkpoint_format: the model weights format. One of 'tf_checkpoint' or
+        'keras_checkpoint'.
+      overrides: (optional) a dict containing keys that can override config
+
+    Returns:
+      A constructed EfficientNet instance.
+    """
+    overrides = dict(overrides) if overrides else {}
+
+    # One can define their own custom models if necessary
+    MODEL_CONFIGS.update(overrides.pop('model_config', {}))
+
+    model = cls(model_config_name=model_name, overrides=overrides)
+
+    if model_weights_path:
+      common_modules.load_weights(model,
+                                  model_weights_path,
+                                  checkpoint_format=checkpoint_format)
+    return model
+
+  def get_config(self):
+    config = {'model_config_name': self.model_config_name,
+              'overrides': self.overrides}
+    keras_model_config = super().get_config()
+    return dict(list(config.items()) + list(keras_model_config.items()))
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(model_config_name=config['model_config_name'],
+               overrides=config['overrides'])
+
+  @property
+  def output_specs(self):
+    """A dict of {level: TensorShape} pairs for the model output."""
+    return self._output_specs
--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model_blocks.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model_blocks.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains definitions for MobilenetEdgeTPUV2 model's building blocks."""
+import dataclasses
+import math
+from typing import Any, Dict, List, Optional, Tuple, Union
+# Import libraries
+from absl import logging
+import tensorflow as tf
+
+from official.modeling import tf_utils
+from official.modeling.hyperparams import base_config
+from official.modeling.hyperparams import oneof
+from official.projects.edgetpu.vision.modeling import common_modules
+from official.projects.edgetpu.vision.modeling import custom_layers
+
+
+@dataclasses.dataclass
+class BlockType(oneof.OneOfConfig):
+  """Block OP types representing IBN version."""
+  type: str = 'ibn_dw'
+  skip: str = 'skip'
+  ibn_dw: str = 'ibn_dw'
+  ibn_fused: str = 'ibn_fused'
+  ibn_grouped: str = 'ibn_grouped'
+  ibn_fused_grouped: str = 'ibn_fused_grouped'
+
+
+@dataclasses.dataclass
+class BlockSearchConfig(base_config.Config):
+  """Config for searchable BlockConfig parameters."""
+  op_type: BlockType = BlockType()
+  kernel_size: Optional[int] = None
+  expand_ratio: Optional[int] = None
+  stride: Optional[int] = None
+  group_size: Optional[int] = None
+
+
+@dataclasses.dataclass
+class BlockConfig(base_config.Config):
+  """Full config for a single MB Conv Block."""
+  input_filters: int = 0
+  output_filters: int = 0
+  kernel_size: int = 3
+  num_repeat: int = 1
+  expand_ratio: int = 1
+  strides: Tuple[int, int] = (1, 1)
+  se_ratio: Optional[float] = None
+  id_skip: bool = True
+  fused_expand: bool = False
+  fused_project: bool = False
+  conv_type: str = 'depthwise'
+  group_size: Optional[int] = None
+
+  @classmethod
+  def from_search_config(cls,
+                         input_filters: int,
+                         output_filters: int,
+                         block_search_config: BlockSearchConfig,
+                         num_repeat: int = 1,
+                         se_ratio: Optional[float] = None,
+                         id_skip: bool = True) -> 'BlockConfig':
+    """Creates BlockConfig from the given parameters."""
+    block_op_type = block_search_config.op_type
+
+    if block_op_type.type == BlockType.skip:
+      raise ValueError('Received skip type within block creation.')
+    elif block_op_type.type == BlockType.ibn_dw:
+      fused_expand = False
+      fused_project = False
+      conv_type = 'depthwise'
+    elif block_op_type.type == BlockType.ibn_fused:
+      fused_expand = True
+      fused_project = False
+      conv_type = 'full'
+    elif block_op_type.type == BlockType.ibn_fused_grouped:
+      fused_expand = True
+      fused_project = False
+      conv_type = 'group'
+    elif block_op_type.type == BlockType.ibn_grouped:
+      fused_expand = False
+      fused_project = False
+      conv_type = 'group'
+    else:
+      raise NotImplementedError(f'Unsupported IBN type {block_op_type.type}.')
+
+    return cls.from_args(
+        input_filters=input_filters,
+        output_filters=output_filters,
+        kernel_size=block_search_config.kernel_size,
+        num_repeat=num_repeat,
+        expand_ratio=block_search_config.expand_ratio,
+        strides=(block_search_config.stride, block_search_config.stride),
+        se_ratio=se_ratio,
+        id_skip=id_skip,
+        fused_expand=fused_expand,
+        fused_project=fused_project,
+        conv_type=conv_type,
+        group_size=block_search_config.group_size)
+
+
+@dataclasses.dataclass
+class BlockGroupConfig(base_config.Config):
+  """Config for group of blocks that share the same filter size."""
+  blocks: List[BlockSearchConfig] = dataclasses.field(default_factory=list)
+  filters: int = 64
+
+
+def _default_mobilenet_edgetpu_v2_topology():
+  return [
+      # Block Group 0
+      BlockGroupConfig(
+          blocks=[
+              # BlockSearchConfig: op_type, kernel_size, expand_ratio, stride
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_fused'), 3, 1, 1),
+          ],
+          filters=24),
+      # Block Group 1
+      BlockGroupConfig(
+          blocks=[
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_fused'), 3, 8, 2),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_fused_grouped'), 3, 4, 1),
+          ],
+          filters=48),
+      # Block Group 2
+      BlockGroupConfig(
+          blocks=[
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_fused'), 3, 8, 2),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_fused_grouped'), 3, 4, 1),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_fused'), 3, 4, 1),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_fused_grouped'), 3, 4, 1),
+          ],
+          filters=64),
+      # Block Group 3
+      BlockGroupConfig(
+          blocks=[
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_fused'), 3, 8, 2),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 4, 1),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 4, 1),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 4, 1),
+          ],
+          filters=128),
+      # Block Group 4
+      BlockGroupConfig(
+          blocks=[
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 8, 1),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 4, 1),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 4, 1),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 4, 1),
+          ],
+          filters=160),
+      # Block Group 5
+      BlockGroupConfig(
+          blocks=[
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 8, 2),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 4, 1),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 4, 1),
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 4, 1),
+          ],
+          filters=192),
+      # Block Group 6
+      BlockGroupConfig(
+          blocks=[
+              BlockSearchConfig.from_args(
+                  BlockType.from_args('ibn_dw'), 3, 8, 1),
+          ],
+          filters=256),
+  ]
+
+
+@dataclasses.dataclass
+class TopologyConfig(base_config.Config):
+  """Config for model topology as a collection of BlockGroupConfigs."""
+  block_groups: List[BlockGroupConfig] = dataclasses.field(
+      default_factory=_default_mobilenet_edgetpu_v2_topology)
+
+
+@dataclasses.dataclass
+class ModelConfig(base_config.Config):
+  """Default Config for MobilenetEdgeTPUV2."""
+  width_coefficient: float = 1.0
+  depth_coefficient: float = 1.0
+  resolution: Union[int, Tuple[int, int]] = 224
+  dropout_rate: float = 0.1
+  stem_base_filters: int = 64
+  stem_kernel_size: int = 5
+  top_base_filters: int = 1280
+  blocks: Tuple[BlockConfig, ...] = (
+      # (input_filters, output_filters, kernel_size, num_repeat,
+      #  expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
+      # pylint: disable=bad-whitespace
+      BlockConfig.from_args(
+          stem_base_filters, 24, 3, 1, 1, (1, 1), conv_type='full'),
+      BlockConfig.from_args(
+          24, 48, 3, 1, 8, (2, 2), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(
+          48, 48, 3, 1, 4, (1, 1), fused_expand=True, conv_type='group'),
+      BlockConfig.from_args(
+          48, 64, 3, 1, 8, (2, 2), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(
+          64, 64, 3, 1, 4, (1, 1), fused_expand=True, conv_type='group'),
+      BlockConfig.from_args(
+          64, 64, 3, 1, 4, (1, 1), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(
+          64, 64, 3, 1, 4, (1, 1), fused_expand=True, conv_type='group'),
+      BlockConfig.from_args(
+          64, 128, 3, 1, 8, (2, 2), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(128, 128, 3, 3, 4, (1, 1)),
+      BlockConfig.from_args(128, 160, 3, 1, 8, (1, 1)),
+      BlockConfig.from_args(160, 160, 3, 3, 4, (1, 1)),
+      BlockConfig.from_args(160, 192, 5, 1, 8, (2, 2)),
+      BlockConfig.from_args(192, 192, 5, 3, 4, (1, 1)),
+      BlockConfig.from_args(192, 256, 5, 1, 8, (1, 1)),
+      # pylint: enable=bad-whitespace
+  )
+  activation: str = 'relu'
+  batch_norm: str = 'default'
+  bn_momentum: float = 0.99
+  bn_epsilon: float = 1e-3
+  # While the original implementation used a weight decay of 1e-5,
+  # tf.nn.l2_loss divides it by 2, so we halve this to compensate in Keras
+  weight_decay: float = 5e-6
+  drop_connect_rate: float = 0.1
+  depth_divisor: int = 8
+  min_depth: Optional[int] = None
+  # No Squeeze/Excite for MobilenetEdgeTPUV2
+  use_se: bool = False
+  input_channels: int = 3
+  num_classes: int = 1001
+  model_name: str = 'mobilenet_edgetpu_v2'
+  rescale_input: bool = False
+  data_format: str = 'channels_last'
+  dtype: str = 'float32'
+  # The number of filters in each group. HW arch dependent.
+  group_base_size: int = 64
+  backbone_only: bool = False
+  features_as_dict: bool = False
+
+
+def mobilenet_edgetpu_v2_base(
+    width_coefficient: float = 1.0,
+    depth_coefficient: float = 1.0,
+    stem_base_filters: int = 64,
+    stem_kernel_size: int = 5,
+    top_base_filters: int = 1280,
+    group_base_size: int = 64,
+    dropout_rate: float = 0.2,
+    drop_connect_rate: float = 0.1,
+    filter_size_overrides: Optional[Dict[int, int]] = None,
+    block_op_overrides: Optional[Dict[int, Dict[int, Dict[str, Any]]]] = None,
+    block_group_overrides: Optional[Dict[int, Dict[str, Any]]] = None):
+  """Creates MobilenetEdgeTPUV2 ModelConfig based on tuning parameters."""
+
+  config = ModelConfig()
+  param_overrides = {
+      'width_coefficient': width_coefficient,
+      'depth_coefficient': depth_coefficient,
+      'stem_base_filters': stem_base_filters,
+      'stem_kernel_size': stem_kernel_size,
+      'top_base_filters': top_base_filters,
+      'group_base_size': group_base_size,
+      'dropout_rate': dropout_rate,
+      'drop_connect_rate': drop_connect_rate
+  }
+  config = config.replace(**param_overrides)
+
+  topology_config = TopologyConfig()
+  if filter_size_overrides:
+    for group_id in filter_size_overrides:
+      topology_config.block_groups[group_id].filters = filter_size_overrides[
+          group_id]
+
+  if block_op_overrides:
+    for group_id in block_op_overrides:
+      for block_id in block_op_overrides[group_id]:
+        replaced_block = topology_config.block_groups[group_id].blocks[
+            block_id].replace(**block_op_overrides[group_id][block_id])
+        topology_config.block_groups[group_id].blocks[block_id] = replaced_block
+
+  if block_group_overrides:
+    for group_id in block_group_overrides:
+      replaced_group = topology_config.block_groups[group_id].replace(
+          **block_group_overrides[group_id])
+      topology_config.block_groups[group_id] = replaced_group
+
+  blocks = ()
+  input_filters = stem_base_filters
+
+  for group in topology_config.block_groups:
+    for block_search in group.blocks:
+      if block_search.op_type != BlockType.skip:
+        block = BlockConfig.from_search_config(
+            input_filters=input_filters,
+            output_filters=group.filters,
+            block_search_config=block_search)
+        blocks += (block,)
+        # Set input filters for the next block
+        input_filters = group.filters
+
+  config = config.replace(blocks=blocks)
+
+  return config
+
+
+def autoseg_edgetpu_backbone_base(
+    width_coefficient: float = 1.0,
+    depth_coefficient: float = 1.0,
+    stem_base_filters: int = 64,
+    stem_kernel_size: int = 5,
+    top_base_filters: int = 1280,
+    group_base_size: int = 64,
+    dropout_rate: float = 0.2,
+    drop_connect_rate: float = 0.1,
+    blocks_overrides: Optional[Tuple[BlockConfig, ...]] = None):
+  """Creates a edgetpu ModelConfig based on search on segmentation."""
+
+  config = ModelConfig()
+  config.depth_divisor = 4
+  param_overrides = {
+      'width_coefficient': width_coefficient,
+      'depth_coefficient': depth_coefficient,
+      'stem_base_filters': stem_base_filters,
+      'stem_kernel_size': stem_kernel_size,
+      'top_base_filters': top_base_filters,
+      'group_base_size': group_base_size,
+      'dropout_rate': dropout_rate,
+      'drop_connect_rate': drop_connect_rate,
+  }
+  if blocks_overrides:
+    param_overrides['blocks'] = blocks_overrides
+  config = config.replace(**param_overrides)
+  return config
+
+
+def autoseg_edgetpu_backbone_s() -> ModelConfig:
+  """AutoML searched model with 2.5ms target simulated latency."""
+  stem_base_filters = 32
+  stem_kernel_size = 3
+  top_base_filters = 1280
+  blocks = (
+      # (input_filters, output_filters, kernel_size, num_repeat,
+      #  expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
+      # pylint: disable=bad-whitespace
+      BlockConfig.from_args(
+          stem_base_filters,
+          12,
+          3,
+          1,
+          1, (1, 1),
+          fused_expand=True,
+          conv_type='full'),
+      BlockConfig.from_args(
+          12, 36, 3, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(36, 18, 5, 1, 3, (1, 1)),
+      BlockConfig.from_args(
+          18, 60, 5, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(60, 60, 3, 1, 3, (1, 1)),
+      BlockConfig.from_args(60, 120, 5, 1, 6, (2, 2)),
+      BlockConfig.from_args(120, 120, 3, 1, 3, (1, 1)),
+      BlockConfig.from_args(120, 120, 5, 1, 6, (1, 1)),
+      BlockConfig.from_args(120, 112, 3, 1, 6, (1, 1)),
+      BlockConfig.from_args(112, 112, 5, 2, 6, (1, 1)),
+      BlockConfig.from_args(112, 112, 5, 1, 1, (2, 2), id_skip=False),
+      BlockConfig.from_args(
+          112, 192, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
+      BlockConfig.from_args(192, 192, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          192, 96, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
+      BlockConfig.from_args(96, 96, 5, 1, 3, (1, 1)),
+      BlockConfig.from_args(96, 96, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          96, 192, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
+      BlockConfig.from_args(192, 192, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          192, 160, 1, 1, 3, (1, 1), fused_expand=True, id_skip=False),
+      # pylint: enable=bad-whitespace
+  )
+  return autoseg_edgetpu_backbone_base(
+      stem_base_filters=stem_base_filters,
+      stem_kernel_size=stem_kernel_size,
+      top_base_filters=top_base_filters,
+      blocks_overrides=blocks,
+      dropout_rate=0.2,
+      drop_connect_rate=0.2)
+
+
+def autoseg_edgetpu_backbone_xs() -> ModelConfig:
+  """AutoML searched model with 2ms target simulated latency."""
+  stem_base_filters = 32
+  stem_kernel_size = 3
+  top_base_filters = 1280
+  blocks = (
+      # (input_filters, output_filters, kernel_size, num_repeat,
+      #  expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
+      # pylint: disable=bad-whitespace
+      BlockConfig.from_args(
+          stem_base_filters,
+          12,
+          3,
+          1,
+          1, (1, 1),
+          fused_expand=True,
+          conv_type='full'),
+      BlockConfig.from_args(
+          12, 24, 3, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(24, 24, 3, 1, 3, (1, 1)),
+      BlockConfig.from_args(
+          24, 60, 3, 1, 3, (2, 2), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(60, 40, 3, 1, 6, (1, 1)),
+      BlockConfig.from_args(40, 40, 5, 1, 3, (2, 2)),
+      BlockConfig.from_args(40, 40, 3, 1, 6, (1, 1)),
+      BlockConfig.from_args(
+          40, 120, 3, 1, 6, (1, 1), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(120, 168, 3, 1, 6, (1, 1)),
+      BlockConfig.from_args(168, 84, 5, 1, 6, (1, 1)),
+      BlockConfig.from_args(84, 84, 5, 1, 3, (1, 1)),
+
+      BlockConfig.from_args(84, 84, 5, 1, 1, (2, 2), id_skip=False),
+      BlockConfig.from_args(
+          84, 288, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
+
+      BlockConfig.from_args(288, 288, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          288, 96, 1, 1, 3, (1, 1), fused_expand=True, id_skip=False),
+
+      BlockConfig.from_args(96, 96, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          96, 96, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
+      BlockConfig.from_args(96, 96, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          96, 96, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
+      BlockConfig.from_args(96, 480, 5, 1, 3, (1, 1)),
+      # pylint: enable=bad-whitespace
+  )
+  return autoseg_edgetpu_backbone_base(
+      stem_base_filters=stem_base_filters,
+      stem_kernel_size=stem_kernel_size,
+      top_base_filters=top_base_filters,
+      blocks_overrides=blocks,
+      dropout_rate=0.2,
+      drop_connect_rate=0.2)
+
+
+def autoseg_edgetpu_backbone_m() -> ModelConfig:
+  """AutoML searched model with 3ms target simulated latency."""
+  stem_base_filters = 32
+  stem_kernel_size = 3
+  top_base_filters = 1280
+  blocks = (
+      # (input_filters, output_filters, kernel_size, num_repeat,
+      #  expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
+      # pylint: disable=bad-whitespace
+      BlockConfig.from_args(stem_base_filters, 16, 5, 1, 1, (1, 1)),
+      BlockConfig.from_args(
+          16, 36, 3, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(36, 36, 3, 1, 3, (1, 1)),
+      BlockConfig.from_args(
+          36, 60, 3, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(60, 60, 3, 1, 6, (1, 1)),
+      BlockConfig.from_args(
+          60, 120, 5, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(120, 120, 5, 1, 6, (1, 1)),
+      BlockConfig.from_args(
+          120, 80, 3, 1, 6, (1, 1), fused_expand=True, conv_type='full'),
+      BlockConfig.from_args(80, 168, 3, 1, 6, (1, 1)),
+      BlockConfig.from_args(168, 168, 5, 1, 6, (1, 1)),
+      BlockConfig.from_args(168, 168, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          168, 168, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
+      BlockConfig.from_args(168, 168, 3, 1, 1, (2, 2), id_skip=False),
+      BlockConfig.from_args(
+          168, 192, 1, 1, 3, (1, 1), fused_expand=True, id_skip=False),
+      BlockConfig.from_args(192, 192, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          192, 288, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
+      BlockConfig.from_args(288, 288, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          288, 96, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
+      BlockConfig.from_args(96, 96, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          96, 192, 1, 1, 3, (1, 1), fused_expand=True, id_skip=False),
+      BlockConfig.from_args(192, 192, 5, 1, 1, (1, 1), id_skip=False),
+      BlockConfig.from_args(
+          192, 320, 1, 1, 3, (1, 1), fused_expand=True, id_skip=False),
+      # pylint: enable=bad-whitespace
+  )
+  return autoseg_edgetpu_backbone_base(
+      stem_base_filters=stem_base_filters,
+      stem_kernel_size=stem_kernel_size,
+      top_base_filters=top_base_filters,
+      blocks_overrides=blocks,
+      dropout_rate=0.3,
+      drop_connect_rate=0.3)
+
+
+def mobilenet_edgetpu_v2_tiny() -> ModelConfig:
+  """MobilenetEdgeTPUV2 tiny model config."""
+  stem_base_filters = 32
+  stem_kernel_size = 5
+  top_base_filters = 1280
+  filter_sizes = [16, 32, 48, 80, 112, 160, 192]
+  filter_size_overrides = {
+      k: v for (k, v) in zip(range(len(filter_sizes)), filter_sizes)
+  }
+  block_op_overrides = {
+      2: {
+          0: {'op_type': BlockType.from_args('ibn_fused_grouped')},
+          2: {'op_type': BlockType.from_args('ibn_fused_grouped')},
+      },
+      3: {
+          0: {'op_type': BlockType.from_args('ibn_fused_grouped')},
+      }
+  }
+
+  return mobilenet_edgetpu_v2_base(
+      stem_base_filters=stem_base_filters,
+      stem_kernel_size=stem_kernel_size,
+      top_base_filters=top_base_filters,
+      filter_size_overrides=filter_size_overrides,
+      block_op_overrides=block_op_overrides,
+      dropout_rate=0.05,
+      drop_connect_rate=0.05)
+
+
+def mobilenet_edgetpu_v2_xs() -> ModelConfig:
+  """MobilenetEdgeTPUV2 extra small model config."""
+  stem_base_filters = 32
+  stem_kernel_size = 5
+  top_base_filters = 1280
+  filter_sizes = [16, 32, 48, 96, 144, 160, 192]
+  filter_size_overrides = {
+      k: v for (k, v) in zip(range(len(filter_sizes)), filter_sizes)
+  }
+
+  return mobilenet_edgetpu_v2_base(
+      stem_base_filters=stem_base_filters,
+      stem_kernel_size=stem_kernel_size,
+      top_base_filters=top_base_filters,
+      filter_size_overrides=filter_size_overrides,
+      dropout_rate=0.05,
+      drop_connect_rate=0.05)
+
+
+def mobilenet_edgetpu_v2_s():
+  """MobilenetEdgeTPUV2 small model config."""
+  stem_base_filters = 64
+  stem_kernel_size = 5
+  top_base_filters = 1280
+  filter_sizes = [24, 48, 64, 128, 160, 192, 256]
+  filter_size_overrides = {
+      k: v for (k, v) in zip(range(len(filter_sizes)), filter_sizes)
+  }
+
+  return mobilenet_edgetpu_v2_base(
+      stem_base_filters=stem_base_filters,
+      stem_kernel_size=stem_kernel_size,
+      top_base_filters=top_base_filters,
+      filter_size_overrides=filter_size_overrides)
+
+
+def mobilenet_edgetpu_v2_m():
+  """MobilenetEdgeTPUV2 medium model config."""
+  stem_base_filters = 64
+  stem_kernel_size = 5
+  top_base_filters = 1344
+  filter_sizes = [32, 64, 80, 160, 192, 240, 320]
+  filter_size_overrides = {
+      k: v for (k, v) in zip(range(len(filter_sizes)), filter_sizes)
+  }
+
+  return mobilenet_edgetpu_v2_base(
+      stem_base_filters=stem_base_filters,
+      stem_kernel_size=stem_kernel_size,
+      top_base_filters=top_base_filters,
+      filter_size_overrides=filter_size_overrides)
+
+
+def mobilenet_edgetpu_v2_l():
+  """MobilenetEdgeTPUV2 large model config."""
+  stem_base_filters = 64
+  stem_kernel_size = 7
+  top_base_filters = 1408
+  filter_sizes = [32, 64, 96, 192, 240, 256, 384]
+  filter_size_overrides = {
+      k: v for (k, v) in zip(range(len(filter_sizes)), filter_sizes)
+  }
+  group_base_size = 128
+
+  return mobilenet_edgetpu_v2_base(
+      stem_base_filters=stem_base_filters,
+      stem_kernel_size=stem_kernel_size,
+      top_base_filters=top_base_filters,
+      group_base_size=group_base_size,
+      filter_size_overrides=filter_size_overrides)
+
+
+CONV_KERNEL_INITIALIZER = {
+    'class_name': 'VarianceScaling',
+    'config': {
+        'scale': 2.0,
+        'mode': 'fan_out',
+        # Note: this is a truncated normal distribution
+        'distribution': 'normal'
+    }
+}
+
+DENSE_KERNEL_INITIALIZER = {
+    'class_name': 'VarianceScaling',
+    'config': {
+        'scale': 1 / 3.0,
+        'mode': 'fan_out',
+        'distribution': 'uniform'
+    }
+}
+
+
+def round_filters(filters: int,
+                  config: ModelConfig) -> int:
+  """Round number of filters based on width coefficient."""
+  width_coefficient = config.width_coefficient
+  min_depth = config.min_depth
+  divisor = config.depth_divisor
+  orig_filters = filters
+
+  if not width_coefficient:
+    return filters
+
+  filters *= width_coefficient
+  min_depth = min_depth or divisor
+  new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
+  # Make sure that round down does not go down by more than 10%.
+  if new_filters < 0.9 * filters:
+    new_filters += divisor
+  logging.info('round_filter input=%s output=%s', orig_filters, new_filters)
+  return int(new_filters)
+
+
+def round_repeats(repeats: int, depth_coefficient: float) -> int:
+  """Round number of repeats based on depth coefficient."""
+  return int(math.ceil(depth_coefficient * repeats))
+
+
+def groupconv2d_block(conv_filters: Optional[int],
+                      config: ModelConfig,
+                      kernel_size: Any = (1, 1),
+                      strides: Any = (1, 1),
+                      group_size: Optional[int] = None,
+                      use_batch_norm: bool = True,
+                      use_bias: bool = False,
+                      activation: Any = None,
+                      name: Optional[str] = None) -> tf.keras.layers.Layer:
+  """2D group convolution with batchnorm and activation."""
+  batch_norm = common_modules.get_batch_norm(config.batch_norm)
+  bn_momentum = config.bn_momentum
+  bn_epsilon = config.bn_epsilon
+  data_format = tf.keras.backend.image_data_format()
+  weight_decay = config.weight_decay
+  if group_size is None:
+    group_size = config.group_base_size
+
+  name = name or ''
+  # Compute the # of groups
+  if conv_filters % group_size != 0:
+    raise ValueError(f'Number of filters: {conv_filters} is not divisible by '
+                     f'size of the groups: {group_size}')
+  groups = int(conv_filters / group_size)
+  # Collect args based on what kind of groupconv2d block is desired
+  init_kwargs = {
+      'kernel_size': kernel_size,
+      'strides': strides,
+      'use_bias': use_bias,
+      'padding': 'same',
+      'name': name + '_groupconv2d',
+      'kernel_regularizer': tf.keras.regularizers.l2(weight_decay),
+      'bias_regularizer': tf.keras.regularizers.l2(weight_decay),
+      'filters': conv_filters,
+      'groups': groups,
+      'batch_norm_layer': batch_norm if use_batch_norm else None,
+      'bn_epsilon': bn_epsilon,
+      'bn_momentum': bn_momentum,
+      'activation': activation,
+      'data_format': data_format,
+  }
+  return custom_layers.GroupConv2D(**init_kwargs)
+
+
+def conv2d_block_as_layers(
+    conv_filters: Optional[int],
+    config: ModelConfig,
+    kernel_size: Any = (1, 1),
+    strides: Any = (1, 1),
+    use_batch_norm: bool = True,
+    use_bias: bool = False,
+    activation: Any = None,
+    depthwise: bool = False,
+    name: Optional[str] = None) -> List[tf.keras.layers.Layer]:
+  """A conv2d followed by batch norm and an activation."""
+  batch_norm = common_modules.get_batch_norm(config.batch_norm)
+  bn_momentum = config.bn_momentum
+  bn_epsilon = config.bn_epsilon
+  data_format = tf.keras.backend.image_data_format()
+  weight_decay = config.weight_decay
+
+  name = name or ''
+
+  # Collect args based on what kind of conv2d block is desired
+  init_kwargs = {
+      'kernel_size': kernel_size,
+      'strides': strides,
+      'use_bias': use_bias,
+      'padding': 'same',
+      'name': name + '_conv2d',
+      'kernel_regularizer': tf.keras.regularizers.l2(weight_decay),
+      'bias_regularizer': tf.keras.regularizers.l2(weight_decay),
+  }
+
+  sequential_layers: List[tf.keras.layers.Layer] = []
+  if depthwise:
+    conv2d = tf.keras.layers.DepthwiseConv2D
+    init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER})
+  else:
+    conv2d = tf.keras.layers.Conv2D
+    init_kwargs.update({'filters': conv_filters,
+                        'kernel_initializer': CONV_KERNEL_INITIALIZER})
+
+  sequential_layers.append(conv2d(**init_kwargs))
+
+  if use_batch_norm:
+    bn_axis = 1 if data_format == 'channels_first' else -1
+    sequential_layers.append(
+        batch_norm(
+            axis=bn_axis,
+            momentum=bn_momentum,
+            epsilon=bn_epsilon,
+            name=name + '_bn'))
+
+  if activation is not None:
+    sequential_layers.append(
+        tf.keras.layers.Activation(activation, name=name + '_activation'))
+  return sequential_layers
+
+
+def conv2d_block(inputs: tf.Tensor,
+                 conv_filters: Optional[int],
+                 config: ModelConfig,
+                 kernel_size: Any = (1, 1),
+                 strides: Any = (1, 1),
+                 use_batch_norm: bool = True,
+                 use_bias: bool = False,
+                 activation: Any = None,
+                 depthwise: bool = False,
+                 name: Optional[str] = None) -> tf.Tensor:
+  """Compatibility with third_party/car/deep_nets."""
+  x = inputs
+  for layer in conv2d_block_as_layers(conv_filters, config, kernel_size,
+                                      strides, use_batch_norm, use_bias,
+                                      activation, depthwise, name):
+    x = layer(x)
+  return x
+
+
+# Do not inherit from (tf.keras.layers.Layer), will break weights loading.
+class _MbConvBlock:
+  """Mobile Inverted Residual Bottleneck composite layer."""
+
+  def __call__(self, inputs: tf.Tensor, training=False):
+    x = inputs
+    for layer in self.expand_block:
+      x = layer(x)
+    if self.squeeze_excitation:
+      se = x
+      for layer in self.squeeze_excitation:
+        se = layer(se)
+      x = tf.keras.layers.multiply([x, se], name=self.name + 'se_excite')
+    for layer in self.project_block:
+      x = layer(x)
+    if self.has_skip_add:
+      x = tf.keras.layers.add([x, inputs], name=self.name + 'add')
+    return x
+
+  def __init__(self,
+               block: BlockConfig,
+               config: ModelConfig,
+               prefix: Optional[str] = None):
+    """Mobile Inverted Residual Bottleneck.
+
+    Args:
+      block: BlockConfig, arguments to create a Block
+      config: ModelConfig, a set of model parameters
+      prefix: prefix for naming all layers
+    """
+    use_se = config.use_se
+    activation = tf_utils.get_activation(config.activation)
+    drop_connect_rate = config.drop_connect_rate
+    data_format = tf.keras.backend.image_data_format()
+    use_depthwise = block.conv_type == 'depthwise'
+    use_groupconv = block.conv_type == 'group'
+    prefix = prefix or ''
+    self.name = prefix
+
+    filters = block.input_filters * block.expand_ratio
+
+    self.expand_block: List[tf.keras.layers.Layer] = []
+    self.squeeze_excitation: List[tf.keras.layers.Layer] = []
+    self.project_block: List[tf.keras.layers.Layer] = []
+
+    if block.fused_project:
+      raise NotImplementedError('Fused projection is not supported.')
+
+    if block.fused_expand and block.expand_ratio != 1:
+      # If we use fused mbconv, fuse expansion with the main kernel.
+      # If conv_type is depthwise we still fuse it to a full conv.
+      if use_groupconv:
+        self.expand_block.append(groupconv2d_block(
+            filters,
+            config,
+            kernel_size=block.kernel_size,
+            strides=block.strides,
+            group_size=block.group_size,
+            activation=activation,
+            name=prefix + 'fused'))
+      else:
+        self.expand_block.extend(conv2d_block_as_layers(
+            filters,
+            config,
+            kernel_size=block.kernel_size,
+            strides=block.strides,
+            activation=activation,
+            name=prefix + 'fused'))
+    else:
+      if block.expand_ratio != 1:
+        # Expansion phase with a pointwise conv
+        self.expand_block.extend(conv2d_block_as_layers(
+            filters,
+            config,
+            kernel_size=(1, 1),
+            activation=activation,
+            name=prefix + 'expand'))
+
+      # Main kernel, after the expansion (if applicable, i.e. not fused).
+      if use_depthwise:
+        self.expand_block.extend(conv2d_block_as_layers(
+            conv_filters=filters,
+            config=config,
+            kernel_size=block.kernel_size,
+            strides=block.strides,
+            activation=activation,
+            depthwise=True,
+            name=prefix + 'depthwise'))
+      elif use_groupconv:
+        self.expand_block.append(groupconv2d_block(
+            conv_filters=filters,
+            config=config,
+            kernel_size=block.kernel_size,
+            strides=block.strides,
+            group_size=block.group_size,
+            activation=activation,
+            name=prefix + 'group'))
+
+    # Squeeze and Excitation phase
+    if use_se:
+      assert block.se_ratio is not None
+      assert 0 < block.se_ratio <= 1
+      num_reduced_filters = max(1, int(
+          block.input_filters * block.se_ratio
+      ))
+
+      if data_format == 'channels_first':
+        se_shape = (filters, 1, 1)
+      else:
+        se_shape = (1, 1, filters)
+
+      self.squeeze_excitation.append(
+          tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze'))
+      self.squeeze_excitation.append(
+          tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape'))
+      self.squeeze_excitation.extend(
+          conv2d_block_as_layers(
+              num_reduced_filters,
+              config,
+              use_bias=True,
+              use_batch_norm=False,
+              activation=activation,
+              name=prefix + 'se_reduce'))
+      self.squeeze_excitation.extend(
+          conv2d_block_as_layers(
+              filters,
+              config,
+              use_bias=True,
+              use_batch_norm=False,
+              activation='sigmoid',
+              name=prefix + 'se_expand'))
+
+    # Output phase
+    self.project_block.extend(
+        conv2d_block_as_layers(
+            block.output_filters,
+            config,
+            activation=None,
+            name=prefix + 'project'))
+
+    # Add identity so that quantization-aware training can insert quantization
+    # ops correctly.
+    self.project_block.append(
+        tf.keras.layers.Activation('linear', name=prefix + 'id'))
+
+    self.has_skip_add = False
+    if (block.id_skip
+        and all(s == 1 for s in block.strides)
+        and block.input_filters == block.output_filters):
+      self.has_skip_add = True
+      if drop_connect_rate and drop_connect_rate > 0:
+        # Apply dropconnect
+        # The only difference between dropout and dropconnect in TF is scaling
+        # by drop_connect_rate during training. See:
+        # https://github.com/keras-team/keras/pull/9898#issuecomment-380577612
+        self.project_block.append(
+            tf.keras.layers.Dropout(
+                drop_connect_rate,
+                noise_shape=(None, 1, 1, 1),
+                name=prefix + 'drop'))
+
+
+def mb_conv_block(inputs: tf.Tensor,
+                  block: BlockConfig,
+                  config: ModelConfig,
+                  prefix: Optional[str] = None) -> tf.Tensor:
+  """Mobile Inverted Residual Bottleneck.
+
+  Args:
+    inputs: the Keras input to the block
+    block: BlockConfig, arguments to create a Block
+    config: ModelConfig, a set of model parameters
+    prefix: prefix for naming all layers
+
+  Returns:
+    the output of the block
+  """
+  return _MbConvBlock(block, config, prefix)(inputs)
+
+
+def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input,
+                         config: ModelConfig):  # pytype: disable=invalid-annotation  # typed-keras
+  """Creates a MobilenetEdgeTPUV2 graph given the model parameters.
+
+  This function is wrapped by the `MobilenetEdgeTPUV2` class to make a
+  tf.keras.Model.
+
+  Args:
+    image_input: the input batch of images
+    config: the model config
+
+  Returns:
+    The output of classification model or if backbone is needed, dictionary with
+    backbone feature levels.
+  """
+  depth_coefficient = config.depth_coefficient
+  blocks = config.blocks
+  stem_base_filters = config.stem_base_filters
+  stem_kernel_size = config.stem_kernel_size
+  top_base_filters = config.top_base_filters
+  activation = tf_utils.get_activation(config.activation)
+  dropout_rate = config.dropout_rate
+  drop_connect_rate = config.drop_connect_rate
+  num_classes = config.num_classes
+  input_channels = config.input_channels
+  rescale_input = config.rescale_input
+  data_format = tf.keras.backend.image_data_format()
+  dtype = config.dtype
+  weight_decay = config.weight_decay
+
+  x = image_input
+  if data_format == 'channels_first':
+    # Happens on GPU/TPU if available.
+    x = tf.keras.layers.Permute((3, 1, 2))(x)
+  if rescale_input:
+    x = common_modules.normalize_images(
+        x, num_channels=input_channels, dtype=dtype, data_format=data_format)
+
+  # Build stem
+  x = conv2d_block(
+      x,
+      round_filters(stem_base_filters, config),
+      config,
+      kernel_size=[stem_kernel_size, stem_kernel_size],
+      strides=[2, 2],
+      activation=activation,
+      name='stem')
+
+  # Build blocks
+  num_blocks_total = sum(block.num_repeat for block in blocks)
+  block_num = 0
+
+  backbone_levels = []
+  for stack_idx, block in enumerate(blocks):
+    is_reduction = False
+    assert block.num_repeat > 0
+    # Update block input and output filters based on depth multiplier
+    block = block.replace(
+        input_filters=round_filters(block.input_filters, config),
+        output_filters=round_filters(block.output_filters, config),
+        num_repeat=round_repeats(block.num_repeat, depth_coefficient))
+
+    if stack_idx == 0:
+      backbone_levels.append(x)
+    elif (stack_idx == len(blocks) - 1) or (blocks[stack_idx + 1].strides
+                                            == (2, 2)):
+      is_reduction = True
+    # The first block needs to take care of stride and filter size increase
+    drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
+    config = config.replace(drop_connect_rate=drop_rate)
+    block_prefix = 'stack_{}/block_0/'.format(stack_idx)
+    x = _MbConvBlock(block, config, block_prefix)(x)
+    block_num += 1
+    if block.num_repeat > 1:
+      block = block.replace(
+          input_filters=block.output_filters,
+          strides=[1, 1]
+      )
+
+      for block_idx in range(block.num_repeat - 1):
+        drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
+        config = config.replace(drop_connect_rate=drop_rate)
+        block_prefix = 'stack_{}/block_{}/'.format(stack_idx, block_idx + 1)
+        x = _MbConvBlock(block, config, prefix=block_prefix)(x)
+        block_num += 1
+    if is_reduction:
+      backbone_levels.append(x)
+
+  if config.backbone_only:
+    return backbone_levels
+  # Build top
+  x = conv2d_block(x,
+                   round_filters(top_base_filters, config),
+                   config,
+                   activation=activation,
+                   name='top')
+
+  # Build classifier
+  pool_size = (x.shape.as_list()[1], x.shape.as_list()[2])
+  x = tf.keras.layers.AveragePooling2D(pool_size, name='top_pool')(x)
+  if dropout_rate and dropout_rate > 0:
+    x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x)
+  x = tf.keras.layers.Conv2D(
+      num_classes,
+      1,
+      kernel_initializer=DENSE_KERNEL_INITIALIZER,
+      kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
+      bias_regularizer=tf.keras.regularizers.l2(weight_decay),
+      name='logits')(
+          x)
+  x = tf.keras.layers.Activation('softmax', name='probs')(x)
+  x = tf.squeeze(x, axis=[1, 2])
+
+  return x
--- a/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model_test.py
+++ b/official/projects/edgetpu/vision/modeling/mobilenet_edgetpu_v2_model_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for mobilenet_edgetpu model."""
+
+import os
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.projects.edgetpu.vision.modeling import common_modules
+from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v2_model
+
+
+class MobilenetEdgeTPUV2BuildTest(tf.test.TestCase, parameterized.TestCase):
+
+  def setUp(self):
+    super(tf.test.TestCase, self).setUp()
+    # Ensure no model duplicates
+    tf.keras.backend.clear_session()
+
+  def test_create_mobilenet_edgetpu(self):
+    model = mobilenet_edgetpu_v2_model.MobilenetEdgeTPUV2()
+    self.assertEqual(common_modules.count_params(model), 6069657)
+
+  def test_export_tflite(self):
+    model = mobilenet_edgetpu_v2_model.MobilenetEdgeTPUV2()
+    converter = tf.lite.TFLiteConverter.from_keras_model(model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+    tmp_dir = self.create_tempdir()
+    output_tflite = os.path.join(tmp_dir, 'model_quant.tflite')
+    tflite_buffer = converter.convert()
+    tf.io.gfile.GFile(output_tflite, 'wb').write(tflite_buffer)
+    self.assertTrue(tf.io.gfile.exists(output_tflite))
+
+  def test_model_save_load(self):
+    """Serializes and de-serializeds the model."""
+    model_builder = mobilenet_edgetpu_v2_model.MobilenetEdgeTPUV2
+    model = model_builder.from_name(model_name='mobilenet_edgetpu_v2')
+    # Model always has a conv2d layer followed by the input layer, and we
+    # compare the weight parameters of this layers for the original model and
+    # the save-then-load model.
+    first_conv_layer = model.get_layer('stem_conv2d')
+    kernel_tensor = first_conv_layer.trainable_weights[0].numpy()
+    model.save('/tmp/test_model')
+    loaded_model = tf.keras.models.load_model('/tmp/test_model')
+    loaded_first_conv_layer = loaded_model.get_layer('stem_conv2d')
+    loaded_kernel_tensor = loaded_first_conv_layer.trainable_weights[0].numpy()
+
+    self.assertAllClose(kernel_tensor, loaded_kernel_tensor)
+
+  def test_model_initialization_failure(self):
+    """Tests model can only be initialized with predefined model name."""
+    model_builder = mobilenet_edgetpu_v2_model.MobilenetEdgeTPUV2
+    with self.assertRaises(ValueError):
+      _ = model_builder.from_name(model_name='undefined_model_name')
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/vision/serving/__init__.py
+++ b/official/projects/edgetpu/vision/serving/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
--- a/official/projects/edgetpu/vision/serving/export_tflite.py
+++ b/official/projects/edgetpu/vision/serving/export_tflite.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# pylint: disable=line-too-long
+r"""Export model (float or quantized tflite, and saved model) from a trained checkpoint.
+
+Example:
+To export dummy quantized model:
+export_tflite --model_name=mobilenet_edgetpu_v2_s --output_dir=/tmp --quantize
+
+Using a training checkpoint:
+export_tflite --model_name=mobilenet_edgetpu_v2_s \
+--ckpt_path=/path/to/training/checkpoint \
+--dataset_dir=/path/to/your/dataset --output_dir=/tmp --quantize
+
+Exporting w/o final squeeze layer:
+export_tflite --model_name=mobilenet_edgetpu_v2_xs \
+--output_layer=probs \
+--dataset_dir=/path/to/your/dataset --output_dir=/tmp --quantize
+"""
+# pylint: enable=line-too-long
+import os
+
+from absl import app
+from absl import flags
+from absl import logging
+import tensorflow as tf
+
+from official.projects.edgetpu.vision.modeling import common_modules
+from official.projects.edgetpu.vision.serving import export_util
+
+flags.DEFINE_string('model_name', None,
+                    'Used to build model using experiment config factory.')
+flags.DEFINE_string(
+    'ckpt_path', None, 'Path to the checkpoint. '
+    'If not provided tflite with random parameters is exported.')
+flags.DEFINE_enum(
+    'ckpt_format', 'tf_checkpoint',
+    ['tf_checkpoint', 'keras_checkpoint'],
+    'tf_checkpoint is for ckpt files from tf.train.Checkpoint.save() method'
+    'keras_checkpoint is for ckpt files from keras.Model.save_weights() method')
+flags.DEFINE_string('output_dir', None, 'Directory to output exported files.')
+flags.DEFINE_integer(
+    'image_size', 224,
+    'Size of the input image. Ideally should be the same as the image_size used '
+    'in training config.')
+flags.DEFINE_string(
+    'output_layer', None,
+    'Layer name to take the output from. Can be used to take the output from '
+    'an intermediate layer. None means use the original model output.')
+flags.DEFINE_string(
+    'finalize_method', 'none',
+    'Additional layers to be added to customize serving output.\n'
+    'Supported are (none|(argmax|resize<?>)[,...]).\n'
+    '- none: do not add extra serving layers.\n'
+    '- argmax: adds argmax.\n'
+    '- squeeze: removes dimensions of size 1 from the shape of a tensor.\n'
+    '- resize<?> (for example resize512): adds resize bilinear|nn to <?> size.'
+    'For example: --finalize_method=resize128,argmax,resize512,squeeze\n'
+    'Will do resize bilinear to 128x128, then argmax then resize nn to 512x512')
+
+# Quantization related parameters
+flags.DEFINE_bool(
+    'quantize', False,
+    'Quantize model before exporting tflite. Note that only the exported '
+    'TFLite is quantized not the SavedModel.')
+flags.DEFINE_bool('use_experimental_quantizer', True, 'Enables experimental '
+                  'quantizer of TFLiteConverter 2.0.')
+flags.DEFINE_bool(
+    'quantize_less_restrictive', False,
+    'Allows non int8 based intermediate types, automatic model output type.')
+flags.DEFINE_integer(
+    'num_calibration_steps', 100,
+    'Number of post-training quantization calibration steps to run.')
+flags.DEFINE_string('dataset_name', 'imagenet2012',
+                    'Name of the dataset to use for quantization calibration.')
+flags.DEFINE_string('dataset_dir', None, 'Dataset location.')
+flags.DEFINE_string(
+    'dataset_split', 'train',
+    'The dataset split (train, validation etc.) to use for calibration.')
+
+FLAGS = flags.FLAGS
+
+
+def get_export_config_from_flags():
+  """Creates ExportConfig from cmd line flags."""
+  quantization_config = export_util.QuantizationConfig(
+      quantize=FLAGS.quantize,
+      quantize_less_restrictive=FLAGS.quantize_less_restrictive,
+      use_experimental_quantizer=FLAGS.use_experimental_quantizer,
+      num_calibration_steps=FLAGS.num_calibration_steps,
+      dataset_name=FLAGS.dataset_name,
+      dataset_dir=FLAGS.dataset_dir,
+      dataset_split=FLAGS.dataset_split)
+  export_config = export_util.ExportConfig(
+      model_name=FLAGS.model_name,
+      ckpt_path=FLAGS.ckpt_path,
+      ckpt_format=FLAGS.ckpt_format,
+      output_dir=FLAGS.output_dir,
+      image_size=FLAGS.image_size,
+      finalize_method=FLAGS.finalize_method.lower().split(','),
+      quantization_config=quantization_config)
+  return export_config
+
+
+def run_export():
+  """Exports TFLite with PTQ."""
+  export_config = get_export_config_from_flags()
+  model = export_util.build_experiment_model(
+      experiment_type=export_config.model_name)
+
+  if export_config.ckpt_path:
+    logging.info('Loading checkpoint from %s', FLAGS.ckpt_path)
+    common_modules.load_weights(
+        model,
+        export_config.ckpt_path,
+        checkpoint_format=export_config.ckpt_format)
+  else:
+    logging.info('No checkpoint provided. Using randomly initialized weights.')
+
+  if export_config.output_layer is not None:
+    all_layer_names = {l.name for l in model.layers}
+    if export_config.output_layer not in all_layer_names:
+      model.summary()
+      logging.info(
+          'Cannot find the layer %s in the model. See the above summary to '
+          'chose an output layer.', export_config.output_layer)
+      return
+    output_layer = model.get_layer(export_config.output_layer)
+    model = tf.keras.Model(model.input, output_layer.output)
+
+  model_input = tf.keras.Input(
+      shape=(export_config.image_size, export_config.image_size, 3),
+      batch_size=1)
+  model_output = export_util.finalize_serving(model(model_input), export_config)
+  model_for_inference = tf.keras.Model(model_input, model_output)
+
+  # Convert to tflite. Quantize if quantization parameters are specified.
+  converter = tf.lite.TFLiteConverter.from_keras_model(model_for_inference)
+  export_util.configure_tflite_converter(export_config, converter)
+  tflite_buffer = converter.convert()
+
+  # Make sure the base directory exists and write tflite.
+  tf.io.gfile.makedirs(os.path.dirname(export_config.output_dir))
+  tflite_path = os.path.join(export_config.output_dir,
+                             f'{export_config.model_name}.tflite')
+  tf.io.gfile.GFile(tflite_path, 'wb').write(tflite_buffer)
+  print('TfLite model exported to {}'.format(tflite_path))
+
+  # Export saved model.
+  saved_model_path = os.path.join(export_config.output_dir,
+                                  export_config.model_name)
+  model_for_inference.save(saved_model_path)
+  print('SavedModel exported to {}'.format(saved_model_path))
+
+
+def main(_):
+  run_export()
+
+
+if __name__ == '__main__':
+  flags.mark_flag_as_required('model_name')
+  app.run(main)
--- a/official/projects/edgetpu/vision/serving/export_tflite_test.py
+++ b/official/projects/edgetpu/vision/serving/export_tflite_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for export_tflite."""
+
+import itertools
+import os
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.projects.edgetpu.vision.serving import export_util
+
+
+def _build_model(config):
+  model = export_util.build_experiment_model(config.model_name)
+  model_input = tf.keras.Input(
+      shape=(config.image_size, config.image_size, 3), batch_size=1)
+  model_output = export_util.finalize_serving(model(model_input), config)
+  model_for_inference = tf.keras.Model(model_input, model_output)
+  return model_for_inference
+
+
+def _dump_tflite(model, config):
+  converter = tf.lite.TFLiteConverter.from_keras_model(model)
+  export_util.configure_tflite_converter(config, converter)
+  tflite_buffer = converter.convert()
+  tf.io.gfile.makedirs(os.path.dirname(config.output_dir))
+  tflite_path = os.path.join(config.output_dir, f'{config.model_name}.tflite')
+  tf.io.gfile.GFile(tflite_path, 'wb').write(tflite_buffer)
+  return tflite_path
+
+
+SEG_MODELS = [
+    'autoseg_edgetpu_xs',
+]
+FINALIZE_METHODS = [
+    'resize512,argmax,squeeze', 'resize256,argmax,resize512,squeeze',
+    'resize128,argmax,resize512,squeeze'
+]
+
+
+class ExportTfliteTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(
+      ('mobilenet_edgetpu_v2_xs', 224),
+      ('autoseg_edgetpu_xs', 512),
+      ('deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k', 512),
+      ('deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32', 512),
+  )
+  def test_model_build_and_export_tflite(self, model_name, image_size):
+    tmp_dir = self.create_tempdir().full_path
+    config = export_util.ExportConfig(
+        model_name=model_name, image_size=image_size, output_dir=tmp_dir)
+    config.quantization_config.quantize = False
+    model = _build_model(config)
+    tflite_path = _dump_tflite(model, config)
+    self.assertTrue(tf.io.gfile.exists(tflite_path))
+
+  @parameterized.parameters(
+      ('mobilenet_edgetpu_v2_xs', 224),
+      ('autoseg_edgetpu_xs', 512),
+      ('deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k', 512),
+      ('deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32', 512),
+  )
+  def test_model_build_and_export_saved_model(self, model_name, image_size):
+    tmp_dir = self.create_tempdir().full_path
+    config = export_util.ExportConfig(
+        model_name=model_name, image_size=image_size, output_dir=tmp_dir)
+    model = _build_model(config)
+    saved_model_path = os.path.join(config.output_dir, config.model_name)
+    model.save(saved_model_path)
+    self.assertTrue(tf.saved_model.contains_saved_model(saved_model_path))
+
+  @parameterized.parameters(itertools.product(SEG_MODELS, FINALIZE_METHODS))
+  def test_segmentation_finalize_methods(self, model_name, finalize_method):
+    tmp_dir = self.create_tempdir().full_path
+    config = export_util.ExportConfig(
+        model_name=model_name,
+        image_size=512,
+        output_dir=tmp_dir,
+        finalize_method=finalize_method.split(','))
+    config.quantization_config.quantize = False
+    model = _build_model(config)
+    model_input = tf.random.normal([1, config.image_size, config.image_size, 3])
+    self.assertEqual(
+        model(model_input).get_shape().as_list(),
+        [1, config.image_size, config.image_size])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/vision/serving/export_util.py
+++ b/official/projects/edgetpu/vision/serving/export_util.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Implements serving with custom post processing."""
+
+import dataclasses
+from typing import List, Optional
+
+import tensorflow as tf
+import tensorflow_datasets as tfds
+
+from official.core import exp_factory
+from official.core import task_factory
+from official.modeling.hyperparams import base_config
+# pylint: disable=unused-import
+from official.projects.edgetpu.vision.configs import mobilenet_edgetpu_config
+from official.projects.edgetpu.vision.configs import semantic_segmentation_config
+from official.projects.edgetpu.vision.configs import semantic_segmentation_searched_config
+from official.projects.edgetpu.vision.modeling import custom_layers
+from official.projects.edgetpu.vision.modeling.backbones import mobilenet_edgetpu
+from official.projects.edgetpu.vision.tasks import image_classification
+from official.projects.edgetpu.vision.tasks import semantic_segmentation as edgetpu_semantic_segmentation
+from official.vision.beta.tasks import semantic_segmentation
+# pylint: enable=unused-import
+
+MEAN_RGB = [127.5, 127.5, 127.5]
+STDDEV_RGB = [127.5, 127.5, 127.5]
+
+
+@dataclasses.dataclass
+class QuantizationConfig(base_config.Config):
+  """Configuration for post training quantization.
+
+  Attributes:
+    quantize: Whether to quantize model before exporting tflite.
+    quantize_less_restrictive: Allows non int8 based intermediate types,
+      automatic model output type.
+    use_experimental_quantizer: Enables experimental quantizer of
+      TFLiteConverter 2.0.
+    num_calibration_steps: Number of post-training quantization calibration
+      steps to run.
+    dataset_name: Name of the dataset to use for quantization calibration.
+    dataset_dir: Dataset location.
+    dataset_split: The dataset split (train, validation etc.) to use for
+      calibration.
+  """
+  quantize: bool = False
+  quantize_less_restrictive: bool = False
+  use_experimental_quantizer: bool = True
+  dataset_name: Optional[str] = None
+  dataset_dir: Optional[str] = None
+  dataset_split: Optional[str] = None
+  num_calibration_steps: int = 100
+
+
+@dataclasses.dataclass
+class ExportConfig(base_config.Config):
+  """Configuration for exporting models as tflite and saved_models.
+
+  Attributes:
+    model_name: One of the registered model names
+    ckpt_path: Path of the training checkpoint. If not provided tflite with
+      random parameters is exported.
+    ckpt_format: Format of the checkpoint. tf_checkpoint is for ckpt files from
+      tf.train.Checkpoint.save() method. keras_checkpoint is for ckpt files from
+      keras.Model.save_weights() method
+    output_dir: Directory to output exported files.
+    image_size: Size of the input image. Ideally should be the same as the
+      image_size used in training config
+    output_layer: Layer name to take the output from. Can be used to take the
+      output from an intermediate layer. None means use the original model
+      output.
+    finalize_method: 'Additional layers to be added to customize serving output
+      Supported are (none|(argmax|resize<?>)[,...]).
+      - none: do not add extra serving layers.
+      - argmax: adds argmax.
+      - squeeze: removes dimensions (except batch dim) of size 1 from the shape
+        of a tensor.
+      - resize<?> (for example resize512): adds resize bilinear|nn to <?> size.
+      For example: --finalize_method=resize128,argmax,resize512,squeeze will do
+        resize bilinear to 128x128, then argmax then resize nn to 512x512
+  """
+  quantization_config: QuantizationConfig = QuantizationConfig()
+  model_name: str = None
+  ckpt_path: Optional[str] = None
+  ckpt_format: Optional[str] = 'tf_checkpoint'
+  output_dir: str = '/tmp/'
+  image_size: int = 224
+  output_layer: Optional[str] = None
+  finalize_method: Optional[List[str]] = None
+
+
+def finalize_serving(model_output, export_config):
+  """Adds extra layers based on the provided configuration."""
+
+  finalize_method = export_config.finalize_method
+  output_layer = model_output
+  if not finalize_method or finalize_method[0] == 'none':
+    return output_layer
+  discrete = False
+  for i in range(len(finalize_method)):
+    if finalize_method[i] == 'argmax':
+      discrete = True
+      is_argmax_last = (i + 1) == len(finalize_method)
+      if is_argmax_last:
+        output_layer = tf.argmax(
+            output_layer, axis=3, output_type=tf.dtypes.int32)
+      else:
+        # TODO(tohaspiridonov): add first_match=False when cl/383951533 submited
+        output_layer = custom_layers.argmax(
+            output_layer, keepdims=True, epsilon=1e-3)
+    elif finalize_method[i] == 'squeeze':
+      output_layer = tf.squeeze(output_layer, axis=3)
+    else:
+      resize_params = finalize_method[i].split('resize')
+      if len(resize_params) != 2 or resize_params[0]:
+        raise ValueError('Cannot finalize with ' + finalize_method[i] + '.')
+      resize_to_size = int(resize_params[1])
+      if discrete:
+        output_layer = tf.image.resize(
+            output_layer, [resize_to_size, resize_to_size],
+            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+      else:
+        output_layer = tf.image.resize(
+            output_layer, [resize_to_size, resize_to_size],
+            method=tf.image.ResizeMethod.BILINEAR)
+  return output_layer
+
+
+def preprocess_for_quantization(image_data, image_size, crop_padding=32):
+  """Crops to center of image with padding then scales, normalizes image_size.
+
+  Args:
+    image_data: A 3D Tensor representing the RGB image data. Image can be of
+      arbitrary height and width.
+    image_size: image height/width dimension.
+    crop_padding: the padding size to use when centering the crop.
+
+  Returns:
+    A decoded and cropped image Tensor. Image is normalized to [-1,1].
+  """
+  shape = tf.shape(image_data)
+  image_height = shape[0]
+  image_width = shape[1]
+
+  padded_center_crop_size = tf.cast(
+      (image_size * 1.0 / (image_size + crop_padding)) *
+      tf.cast(tf.minimum(image_height, image_width), tf.float32), tf.int32)
+
+  offset_height = ((image_height - padded_center_crop_size) + 1) // 2
+  offset_width = ((image_width - padded_center_crop_size) + 1) // 2
+
+  image = tf.image.crop_to_bounding_box(
+      image_data,
+      offset_height=offset_height,
+      offset_width=offset_width,
+      target_height=padded_center_crop_size,
+      target_width=padded_center_crop_size)
+
+  image = tf.image.resize([image], [image_size, image_size],
+                          method=tf.image.ResizeMethod.BILINEAR)[0]
+  image = tf.cast(image, tf.float32)
+  image -= tf.constant(MEAN_RGB)
+  image /= tf.constant(STDDEV_RGB)
+  return image
+
+
+def representative_dataset_gen(export_config):
+  """Gets a python generator of numpy arrays for the given dataset."""
+  quantization_config = export_config.quantization_config
+  dataset = tfds.builder(
+      quantization_config.dataset_name,
+      data_dir=quantization_config.dataset_dir)
+  dataset.download_and_prepare()
+  data = dataset.as_dataset()[quantization_config.dataset_split]
+  iterator = data.as_numpy_iterator()
+  for _ in range(quantization_config.num_calibration_steps):
+    features = next(iterator)
+    image = features['image']
+    image = preprocess_for_quantization(image, export_config.image_size)
+    image = tf.reshape(
+        image, [1, export_config.image_size, export_config.image_size, 3])
+    yield [image]
+
+
+def configure_tflite_converter(export_config, converter):
+  """Common code for picking up quantization parameters."""
+  quantization_config = export_config.quantization_config
+  if quantization_config.quantize:
+    if quantization_config.dataset_dir is None:
+      raise ValueError(
+          'Must provide a representative dataset when quantizing the model.')
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+    converter.target_spec.supported_ops = [
+        tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+    ]
+    converter.inference_input_type = tf.int8
+    converter.inference_output_type = tf.int8
+    if quantization_config.quantize_less_restrictive:
+      converter.target_spec.supported_ops += [
+          tf.lite.OpsSet.TFLITE_BUILTINS
+      ]
+      converter.inference_output_type = tf.float32
+    def _representative_dataset_gen():
+      return representative_dataset_gen(export_config)
+
+    converter.representative_dataset = _representative_dataset_gen
+
+
+def build_experiment_model(experiment_type):
+  """Builds model from experiment type configuration."""
+  params = exp_factory.get_exp_config(experiment_type)
+  params.validate()
+  params.lock()
+  task = task_factory.get_task(params.task)
+  return task.build_model()
--- a/official/projects/edgetpu/vision/serving/inference_visualization_tool.ipynb
+++ b/official/projects/edgetpu/vision/serving/inference_visualization_tool.ipynb
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Klhdy8pnk5J8"
+      },
+      "source": [
+        "**A tool to visualize the segmentation model inference output.**\\\n",
+        "This tool is used verify that the exported tflite can produce expected segmentation results.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-vGHZSPWXbyu"
+      },
+      "outputs": [],
+      "source": [
+        "MODEL='gs://**/placeholder_for_edgetpu_models/autoseg/segmentation_search_edgetpu_s_not_fused.tflite'#@param\n",
+        "IMAGE_HOME = 'gs://**/PS_Compare/20190711'#@param\n",
+        "# Relative image file names separated by comas.\n",
+        "TEST_IMAGES = 'ADE_val_00001626.jpg,ADE_val_00001471.jpg,ADE_val_00000557.jpg'#@param\n",
+        "IMAGE_WIDTH = 512 #@param\n",
+        "IMAGE_HEIGHT = 512 #@param"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zzhF1ASDkxTU"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "from PIL import Image as PILImage\n",
+        "import matplotlib.pyplot as plt\n",
+        "from scipy import ndimage"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AXaJgLg1ml16"
+      },
+      "outputs": [],
+      "source": [
+        "# This block creates local copies of /cns and /x20 files.\n",
+        "TEST_IMAGES=','.join([IMAGE_HOME+'/'+image for image in TEST_IMAGES.split(',')])\n",
+        "\n",
+        "# The tflite interpreter only accepts model in local path.\n",
+        "def local_copy(awaypath):\n",
+        "  localpath = '/tmp/' + awaypath.split('/')[-1]\n",
+        "  !rm -f {localpath}\n",
+        "  !fileutil cp -f {awaypath} {localpath}\n",
+        "  !ls -lht {localpath}\n",
+        "  %download_file {localpath}\n",
+        "  return localpath\n",
+        "\n",
+        "IMAGES = [local_copy(image) for image in TEST_IMAGES.split(',')]\n",
+        "MODEL_COPY=local_copy(MODEL)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KhS1lOrxHp5C"
+      },
+      "outputs": [],
+      "source": [
+        "# Creates a 6px wide boolean edge mask to highlight the segmentation.\n",
+        "def edge(mydata):\n",
+        "  mydata = mydata.reshape(512, 512)\n",
+        "  mydatat = mydata.transpose([1, 0])\n",
+        "  mydata = np.convolve(mydata.reshape(-1), [-1, 0, 1], mode='same').reshape(512, 512)\n",
+        "  mydatat = np.convolve(mydatat.reshape(-1), [-1, 0, 1], mode='same').reshape(512, 512).transpose([1, 0])\n",
+        "  mydata = np.maximum((mydata != 0).astype(np.int8), (mydatat != 0).astype(np.int8))\n",
+        "  mydata = ndimage.binary_dilation(mydata).astype(np.int8)\n",
+        "  mydata = ndimage.binary_dilation(mydata).astype(np.int8)\n",
+        "  mydata = ndimage.binary_dilation(mydata).astype(np.int8)\n",
+        "  return mydata"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GdlsbiVqL5JZ"
+      },
+      "outputs": [],
+      "source": [
+        "def run_model(input_data):\n",
+        "  _input_data = input_data\n",
+        "  _input_data = (_input_data-128).astype(np.int8)\n",
+        "  # Load the tflite model and allocate tensors.\n",
+        "  interpreter_x = tf.lite.Interpreter(model_path=MODEL_COPY)\n",
+        "  interpreter_x.allocate_tensors()\n",
+        "  # Get input and output tensors.\n",
+        "  input_details = interpreter_x.get_input_details()\n",
+        "  output_details = interpreter_x.get_output_details()\n",
+        "  interpreter_x.set_tensor(input_details[0]['index'], _input_data)\n",
+        "  interpreter_x.invoke()\n",
+        "  output_data = interpreter_x.get_tensor(output_details[0]['index'])\n",
+        "  return output_data.reshape((512, 512, 1))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1mot5M_nl5P7"
+      },
+      "outputs": [],
+      "source": [
+        "# Set visualization wind sizes.\n",
+        "fig, ax = plt.subplots(max(len(IMAGES),2), 3)\n",
+        "fig.set_figwidth(30)\n",
+        "fig.set_figheight(10*max(len(IMAGES),2))\n",
+        "\n",
+        "# Read and test image.\n",
+        "for r, image in enumerate(IMAGES):\n",
+        "  im = PILImage.open(image).convert('RGB')\n",
+        "  min_dim=min(im.size[0], im.size[1])\n",
+        "  im = im.resize((IMAGE_WIDTH*im.size[0] // min_dim, IMAGE_HEIGHT*im.size[1] // min_dim))\n",
+        "  input_data = np.expand_dims(im, axis=0)\n",
+        "  input_data = input_data[:, :IMAGE_WIDTH,:IMAGE_HEIGHT]\n",
+        "  ax[r, 0].imshow(input_data.reshape([512, 512, 3]).astype(np.uint8))\n",
+        "  ax[r, 0].set_title('Original')\n",
+        "  ax[r, 0].grid(False)\n",
+        "\n",
+        "  # Test the model on random input data.\n",
+        "  output_data = run_model(input_data)\n",
+        "  ax[r, 1].imshow(output_data, vmin = 0, vmax = 32)\n",
+        "  ax[r, 1].set_title('Segmentation')\n",
+        "  ax[r, 1].grid(False)\n",
+        "\n",
+        "  output_data = np.reshape(np.minimum(output_data, 32), [512,512])\n",
+        "  output_edge = edge(output_data).reshape(512,512, 1)\n",
+        "  output_data = np.stack([output_data%3, (output_data//3)%3, (output_data//9)%3], axis = -1)\n",
+        "  \n",
+        "  output_data = input_data.reshape([512, 512, 3]).astype(np.float32) * (1-output_edge) + output_data * output_edge * 255\n",
+        "  ax[r, 2].imshow(output_data.astype(np.uint8), vmin = 0, vmax = 256)\n",
+        "  ax[r, 2].set_title('Segmentation \u0026 original')\n",
+        "  ax[r, 2].grid(False)\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "last_runtime": {
+        "build_target": "//quality/ranklab/experimental/notebook:rl_colab",
+        "kind": "private"
+      },
+      "name": "Inference_visualization_tool.ipynb",
+      "private_outputs": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}