Internal change

PiperOrigin-RevId: 404080616

Internal change
PiperOrigin-RevId: 404080616
9114f2a3 · A. Unique TensorFlower · saberkun · ec0d7d0b · 9114f2a3 · 9114f2a3
Commit 9114f2a3 authored Oct 18, 2021 by A. Unique TensorFlower Committed by saberkun Oct 18, 2021
20 changed files
--- a/official/projects/edgetpu/nlp/modeling/model_builder.py
+++ b/official/projects/edgetpu/nlp/modeling/model_builder.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Build MobileBERT-EdgeTPU model."""
+from typing import Optional
+import tensorflow as tf
+from official.modeling import tf_utils
+from official.nlp import modeling
+from official.projects.edgetpu.nlp.configs import params
+from official.projects.edgetpu.nlp.modeling import encoder as edgetpu_encoder
+from official.projects.edgetpu.nlp.modeling import pretrainer as edgetpu_pretrainer
+def build_bert_pretrainer(pretrainer_cfg: params.PretrainerModelParams,
+                          encoder: Optional[tf.keras.Model] = None,
+                          masked_lm: Optional[tf.keras.Model] = None,
+                          quantization_friendly: Optional[bool] = False,
+                          name: Optional[str] = None) -> tf.keras.Model:
+  """Builds pretrainer.
+  Args:
+    pretrainer_cfg: configs for the pretrainer model.
+    encoder: (Optional) The encoder network for the pretrainer model.
+    masked_lm: (Optional) The masked_lm network for the pretrainer model.
+    quantization_friendly: (Optional) If enabled, the model will use EdgeTPU
+      mobilebert transformer. The difference is we have a customized softmax
+      ops which use -120 as the mask value, which is more stable for post-
+      training quantization.
+    name: (Optional) Name of the pretrainer model.
+  Returns:
+    The pretrainer model.
+  """
+  encoder_cfg = pretrainer_cfg.encoder.mobilebert
+  encoder = encoder or edgetpu_encoder.MobileBERTEncoder(
+      word_vocab_size=encoder_cfg.word_vocab_size,
+      word_embed_size=encoder_cfg.word_embed_size,
+      type_vocab_size=encoder_cfg.type_vocab_size,
+      max_sequence_length=encoder_cfg.max_sequence_length,
+      num_blocks=encoder_cfg.num_blocks,
+      hidden_size=encoder_cfg.hidden_size,
+      num_attention_heads=encoder_cfg.num_attention_heads,
+      intermediate_size=encoder_cfg.intermediate_size,
+      intermediate_act_fn=encoder_cfg.hidden_activation,
+      hidden_dropout_prob=encoder_cfg.hidden_dropout_prob,
+      attention_probs_dropout_prob=encoder_cfg.attention_probs_dropout_prob,
+      intra_bottleneck_size=encoder_cfg.intra_bottleneck_size,
+      initializer_range=encoder_cfg.initializer_range,
+      use_bottleneck_attention=encoder_cfg.use_bottleneck_attention,
+      key_query_shared_bottleneck=encoder_cfg.key_query_shared_bottleneck,
+      num_feedforward_networks=encoder_cfg.num_feedforward_networks,
+      normalization_type=encoder_cfg.normalization_type,
+      classifier_activation=encoder_cfg.classifier_activation,
+      input_mask_dtype=encoder_cfg.input_mask_dtype,
+      quantization_friendly=quantization_friendly)
+  if pretrainer_cfg.cls_heads:
+    cls_heads = [
+        modeling.layers.ClassificationHead(**cfg.as_dict())
+        for cfg in pretrainer_cfg.cls_heads
+    ]
+  else:
+    cls_heads = []
+  # Get the embedding table from the encoder model.
+  def _get_embedding_table(encoder):
+    for layer in encoder.layers:
+      if layer.name.startswith('mobile_bert_embedding'):
+        return layer.word_embedding.embeddings
+    raise ValueError('Can not find embedding layer in the encoder.')
+  masked_lm = masked_lm or modeling.layers.MobileBertMaskedLM(
+      embedding_table=_get_embedding_table(encoder),
+      activation=tf_utils.get_activation(pretrainer_cfg.mlm_activation),
+      initializer=tf.keras.initializers.TruncatedNormal(
+          stddev=pretrainer_cfg.mlm_initializer_range),
+      name='cls/predictions')
+  pretrainer = edgetpu_pretrainer.MobileBERTEdgeTPUPretrainer(
+      encoder_network=encoder,
+      classification_heads=cls_heads,
+      customized_masked_lm=masked_lm,
+      name=name)
+  return pretrainer
--- a/official/projects/edgetpu/nlp/modeling/model_builder_test.py
+++ b/official/projects/edgetpu/nlp/modeling/model_builder_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for mobilebert_edgetpu.model_builder.py."""
+import tensorflow as tf
+from official.nlp import modeling
+from official.nlp.configs import encoders
+from official.projects.edgetpu.nlp.configs import params
+from official.projects.edgetpu.nlp.modeling import model_builder
+class ModelBuilderTest(tf.test.TestCase):
+  def setUp(self):
+    super(ModelBuilderTest, self).setUp()
+    self.pretrainer_config = params.PretrainerModelParams(
+        encoder=encoders.EncoderConfig(type='mobilebert'))
+  def test_default_initialization(self):
+    """Initializes pretrainer model from stratch."""
+    pretrainer = model_builder.build_bert_pretrainer(
+        pretrainer_cfg=self.pretrainer_config,
+        name='test_model')
+    # Makes sure the pretrainer variables are created.
+    _ = pretrainer(pretrainer.inputs)
+    self.assertEqual(pretrainer.name, 'test_model')
+    encoder = pretrainer.encoder_network
+    default_number_layer = encoders.MobileBertEncoderConfig().num_blocks
+    encoder_transformer_layer_counter = 0
+    for layer in encoder.layers:
+      if isinstance(layer, modeling.layers.MobileBertTransformer):
+        encoder_transformer_layer_counter += 1
+    self.assertEqual(default_number_layer, encoder_transformer_layer_counter)
+  def test_initialization_with_encoder(self):
+    """Initializes pretrainer model with an existing encoder network."""
+    encoder = encoders.build_encoder(
+        config=encoders.EncoderConfig(type='mobilebert'))
+    pretrainer = model_builder.build_bert_pretrainer(
+        pretrainer_cfg=self.pretrainer_config,
+        encoder=encoder)
+    encoder_network = pretrainer.encoder_network
+    self.assertEqual(encoder_network, encoder)
+  def test_initialization_with_mlm(self):
+    """Initializes pretrainer model with an existing MLM head."""
+    embedding = modeling.layers.MobileBertEmbedding(
+        word_vocab_size=30522,
+        word_embed_size=128,
+        type_vocab_size=2,
+        output_embed_size=encoders.MobileBertEncoderConfig().hidden_size)
+    dummy_input = tf.keras.layers.Input(
+        shape=(None,), dtype=tf.int32)
+    _ = embedding(dummy_input)
+    embedding_table = embedding.word_embedding.embeddings
+    mlm_layer = modeling.layers.MobileBertMaskedLM(
+        embedding_table=embedding_table)
+    pretrainer = model_builder.build_bert_pretrainer(
+        pretrainer_cfg=self.pretrainer_config,
+        masked_lm=mlm_layer)
+    mlm_network = pretrainer.masked_lm
+    self.assertEqual(mlm_network, mlm_layer)
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/nlp/modeling/pretrainer.py
+++ b/official/projects/edgetpu/nlp/modeling/pretrainer.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""BERT Pre-training model."""
+# pylint: disable=g-classes-have-attributes
+import copy
+from typing import List, Optional
+import tensorflow as tf
+from official.nlp.modeling import layers
+@tf.keras.utils.register_keras_serializable(package='Text')
+class MobileBERTEdgeTPUPretrainer(tf.keras.Model):
+  """BERT pretraining model V2.
+  Adds the masked language model head and optional classification heads upon the
+  transformer encoder.
+  Args:
+    encoder_network: A transformer network. This network should output a
+      sequence output and a classification output.
+    mlm_activation: The activation (if any) to use in the masked LM network. If
+      None, no activation will be used.
+    mlm_initializer: The initializer (if any) to use in the masked LM. Default
+      to a Glorot uniform initializer.
+    classification_heads: A list of optional head layers to transform on encoder
+      sequence outputs.
+    customized_masked_lm: A customized masked_lm layer. If None, will create
+      a standard layer from `layers.MaskedLM`; if not None, will use the
+      specified masked_lm layer. Above arguments `mlm_activation` and
+      `mlm_initializer` will be ignored.
+    name: The name of the model.
+  Inputs: Inputs defined by the encoder network, plus `masked_lm_positions` as a
+    dictionary.
+  Outputs: A dictionary of `lm_output`, classification head outputs keyed by
+    head names, and also outputs from `encoder_network`, keyed by
+    `sequence_output` and `encoder_outputs` (if any).
+  """
+  def __init__(
+      self,
+      encoder_network: tf.keras.Model,
+      mlm_activation=None,
+      mlm_initializer='glorot_uniform',
+      classification_heads: Optional[List[tf.keras.layers.Layer]] = None,
+      customized_masked_lm: Optional[tf.keras.layers.Layer] = None,
+      name: str = 'bert',
+      **kwargs):
+    inputs = copy.copy(encoder_network.inputs)
+    outputs = {}
+    encoder_network_outputs = encoder_network(inputs)
+    if isinstance(encoder_network_outputs, list):
+      outputs['pooled_output'] = encoder_network_outputs[1]
+      if isinstance(encoder_network_outputs[0], list):
+        outputs['encoder_outputs'] = encoder_network_outputs[0]
+        outputs['sequence_output'] = encoder_network_outputs[0][-1]
+      else:
+        outputs['sequence_output'] = encoder_network_outputs[0]
+    elif isinstance(encoder_network_outputs, dict):
+      outputs = encoder_network_outputs
+    else:
+      raise ValueError('encoder_network\'s output should be either a list '
+                       'or a dict, but got %s' % encoder_network_outputs)
+    masked_lm_positions = tf.keras.layers.Input(
+        shape=(None,), name='masked_lm_positions', dtype=tf.int32)
+    inputs.append(masked_lm_positions)
+    masked_lm_layer = customized_masked_lm or layers.MaskedLM(
+        embedding_table=encoder_network.get_embedding_table(),
+        activation=mlm_activation,
+        initializer=mlm_initializer,
+        name='cls/predictions')
+    sequence_output = outputs['sequence_output']
+    outputs['mlm_logits'] = masked_lm_layer(
+        sequence_output, masked_positions=masked_lm_positions)
+    classification_head_layers = classification_heads or []
+    for cls_head in classification_head_layers:
+      cls_outputs = cls_head(sequence_output)
+      if isinstance(cls_outputs, dict):
+        outputs.update(cls_outputs)
+      else:
+        outputs[cls_head.name] = cls_outputs
+    super(MobileBERTEdgeTPUPretrainer, self).__init__(
+        inputs=inputs,
+        outputs=outputs,
+        name=name,
+        **kwargs)
+    self._config = {
+        'encoder_network': encoder_network,
+        'mlm_activation': mlm_activation,
+        'mlm_initializer': mlm_initializer,
+        'classification_heads': classification_heads,
+        'customized_masked_lm': customized_masked_lm,
+        'name': name,
+    }
+    self.encoder_network = encoder_network
+    self.masked_lm = masked_lm_layer
+    self.classification_heads = classification_head_layers
+  @property
+  def checkpoint_items(self):
+    """Returns a dictionary of items to be additionally checkpointed."""
+    items = dict(encoder=self.encoder_network, masked_lm=self.masked_lm)
+    for head in self.classification_heads:
+      for key, item in head.checkpoint_items.items():
+        items['.'.join([head.name, key])] = item
+    return items
+  def get_config(self):
+    return self._config
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
--- a/official/projects/edgetpu/nlp/modeling/pretrainer_test.py
+++ b/official/projects/edgetpu/nlp/modeling/pretrainer_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for BERT pretrainer model."""
+import itertools
+from absl.testing import parameterized
+import tensorflow as tf
+from official.nlp.modeling import layers
+from official.nlp.modeling import networks
+from official.projects.edgetpu.nlp.modeling import pretrainer
+class MobileBERTEdgeTPUPretrainerTest(tf.test.TestCase, parameterized.TestCase):
+  @parameterized.parameters(itertools.product([True, False],
+                                              [True, False],
+                                              [True, False]))
+  def test_mobilebert_edgetpu_pretrainer(
+      self,
+      dict_outputs,
+      return_all_encoder_outputs,
+      use_customized_masked_lm):
+    """Validate that the Keras object can be created."""
+    # Build a transformer network to use within the BERT trainer.
+    vocab_size = 100
+    sequence_length = 512
+    hidden_size = 48
+    num_layers = 2
+    test_network = networks.BertEncoder(
+        vocab_size=vocab_size,
+        num_layers=num_layers,
+        hidden_size=hidden_size,
+        max_sequence_length=sequence_length,
+        return_all_encoder_outputs=return_all_encoder_outputs,
+        dict_outputs=dict_outputs)
+    # Create a BERT trainer with the created network.
+    if use_customized_masked_lm:
+      customized_masked_lm = layers.MaskedLM(
+          embedding_table=test_network.get_embedding_table())
+    else:
+      customized_masked_lm = None
+    bert_trainer_model = pretrainer.MobileBERTEdgeTPUPretrainer(
+        encoder_network=test_network, customized_masked_lm=customized_masked_lm)
+    num_token_predictions = 20
+    # Create a set of 2-dimensional inputs (the first dimension is implicit).
+    inputs = dict(
+        input_word_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
+        input_mask=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
+        input_type_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32))
+    inputs['masked_lm_positions'] = tf.keras.Input(
+        shape=(num_token_predictions,), dtype=tf.int32)
+    # Invoke the trainer model on the inputs. This causes the layer to be built.
+    outputs = bert_trainer_model(inputs)
+    has_encoder_outputs = dict_outputs or return_all_encoder_outputs
+    expected_keys = ['sequence_output', 'pooled_output']
+    if has_encoder_outputs:
+      expected_keys.append('encoder_outputs')
+    expected_keys.append('mlm_logits')
+    self.assertSameElements(outputs.keys(), expected_keys)
+    # Validate that the outputs are of the expected shape.
+    expected_lm_shape = [None, num_token_predictions, vocab_size]
+    self.assertAllEqual(expected_lm_shape,
+                        outputs['mlm_logits'].shape.as_list())
+    expected_sequence_output_shape = [None, sequence_length, hidden_size]
+    self.assertAllEqual(expected_sequence_output_shape,
+                        outputs['sequence_output'].shape.as_list())
+    expected_pooled_output_shape = [None, hidden_size]
+    self.assertAllEqual(expected_pooled_output_shape,
+                        outputs['pooled_output'].shape.as_list())
+  def test_multiple_cls_outputs(self):
+    """Validate that the Keras object can be created."""
+    # Build a transformer network to use within the BERT trainer.
+    vocab_size = 100
+    sequence_length = 512
+    hidden_size = 48
+    num_layers = 2
+    test_network = networks.BertEncoder(
+        vocab_size=vocab_size,
+        num_layers=num_layers,
+        hidden_size=hidden_size,
+        max_sequence_length=sequence_length,
+        dict_outputs=True)
+    bert_trainer_model = pretrainer.MobileBERTEdgeTPUPretrainer(
+        encoder_network=test_network,
+        classification_heads=[layers.MultiClsHeads(
+            inner_dim=5, cls_list=[('foo', 2), ('bar', 3)])])
+    num_token_predictions = 20
+    # Create a set of 2-dimensional inputs (the first dimension is implicit).
+    inputs = dict(
+        input_word_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
+        input_mask=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
+        input_type_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
+        masked_lm_positions=tf.keras.Input(
+            shape=(num_token_predictions,), dtype=tf.int32))
+    # Invoke the trainer model on the inputs. This causes the layer to be built.
+    outputs = bert_trainer_model(inputs)
+    self.assertEqual(outputs['foo'].shape.as_list(), [None, 2])
+    self.assertEqual(outputs['bar'].shape.as_list(), [None, 3])
+  def test_v2_serialize_deserialize(self):
+    """Validate that the BERT trainer can be serialized and deserialized."""
+    # Build a transformer network to use within the BERT trainer. (Here, we use
+    # a short sequence_length for convenience.)
+    test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
+    # Create a BERT trainer with the created network. (Note that all the args
+    # are different, so we can catch any serialization mismatches.)
+    bert_trainer_model = pretrainer.MobileBERTEdgeTPUPretrainer(
+        encoder_network=test_network)
+    # Create another BERT trainer via serialization and deserialization.
+    config = bert_trainer_model.get_config()
+    new_bert_trainer_model = pretrainer.MobileBERTEdgeTPUPretrainer.from_config(
+        config)
+    # Validate that the config can be forced to JSON.
+    _ = new_bert_trainer_model.to_json()
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(bert_trainer_model.get_config(),
+                        new_bert_trainer_model.get_config())
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/nlp/run_mobilebert_edgetpu_train.py
+++ b/official/projects/edgetpu/nlp/run_mobilebert_edgetpu_train.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MobileBERT-EdgeTPU model runner."""
+import os
+from absl import app
+from absl import flags
+from absl import logging
+import orbit
+import tensorflow as tf
+from official.common import distribute_utils
+from official.common import flags as tfm_flags
+from official.projects.edgetpu.nlp import mobilebert_edgetpu_trainer
+from official.projects.edgetpu.nlp.configs import params
+from official.projects.edgetpu.nlp.modeling import model_builder
+from official.projects.edgetpu.nlp.utils import utils
+FLAGS = flags.FLAGS
+def main(_):
+  # Set up experiment params and load the configs from file/files.
+  experiment_params = params.EdgeTPUBERTCustomParams()
+  experiment_params = utils.config_override(experiment_params, FLAGS)
+  model_dir = utils.get_model_dir(experiment_params, FLAGS)
+  distribution_strategy = distribute_utils.get_distribution_strategy(
+      distribution_strategy=experiment_params.runtime.distribution_strategy,
+      all_reduce_alg=experiment_params.runtime.all_reduce_alg,
+      num_gpus=experiment_params.runtime.num_gpus,
+      tpu_address=experiment_params.runtime.tpu_address)
+  with distribution_strategy.scope():
+    teacher_model = model_builder.build_bert_pretrainer(
+        pretrainer_cfg=experiment_params.teacher_model,
+        quantization_friendly=False,
+        name='teacher')
+    student_model = model_builder.build_bert_pretrainer(
+        pretrainer_cfg=experiment_params.student_model,
+        quantization_friendly=True,
+        name='student')
+    # Load model weights.
+    teacher_ckpt_dir_or_file = experiment_params.teacher_model_init_checkpoint
+    if not teacher_ckpt_dir_or_file:
+      raise ValueError('`teacher_model_init_checkpoint` is not specified.')
+    utils.load_checkpoint(teacher_model, teacher_ckpt_dir_or_file)
+    student_ckpt_dir_or_file = experiment_params.student_model_init_checkpoint
+    if not student_ckpt_dir_or_file:
+      # Makes sure the pretrainer variables are created.
+      _ = student_model(student_model.inputs)
+      logging.warn('No student checkpoint is provided, training might take '
+                   'much longer before converging.')
+    else:
+      utils.load_checkpoint(student_model, student_ckpt_dir_or_file)
+    runner = mobilebert_edgetpu_trainer.MobileBERTEdgeTPUDistillationTrainer(
+        teacher_model=teacher_model,
+        student_model=student_model,
+        strategy=distribution_strategy,
+        experiment_params=experiment_params,
+        export_ckpt_path=model_dir)
+    # Save checkpoint for preemption handling.
+    # Checkpoint for downstreaming tasks are saved separately inside the
+    # runner's train_loop_end() function.
+    checkpoint = tf.train.Checkpoint(
+        teacher_model=runner.teacher_model,
+        student_model=runner.student_model,
+        layer_wise_optimizer=runner.layer_wise_optimizer,
+        e2e_optimizer=runner.e2e_optimizer,
+        current_step=runner.current_step)
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        directory=model_dir,
+        max_to_keep=5,
+        step_counter=runner.current_step,
+        checkpoint_interval=20000,
+        init_fn=None)
+  controller = orbit.Controller(
+      trainer=runner,
+      evaluator=runner,
+      global_step=runner.current_step,
+      strategy=distribution_strategy,
+      steps_per_loop=experiment_params.orbit_config.steps_per_loop,
+      summary_dir=os.path.join(model_dir, 'train'),
+      eval_summary_dir=os.path.join(model_dir, 'eval'),
+      checkpoint_manager=checkpoint_manager)
+  if FLAGS.mode == 'train':
+    controller.train(steps=experiment_params.orbit_config.total_steps)
+  else:
+    raise ValueError('Unsupported mode, only support `train`')
+if __name__ == '__main__':
+  tfm_flags.define_flags()
+  app.run(main)
--- a/official/projects/edgetpu/nlp/serving/__init__.py
+++ b/official/projects/edgetpu/nlp/serving/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/official/projects/edgetpu/nlp/serving/export_tflite_squad.py
+++ b/official/projects/edgetpu/nlp/serving/export_tflite_squad.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=line-too-long
+r"""Export tflite for MobileBERT-EdgeTPU with SQUAD head.
+Example usage:
+python3 export_tflite_squad.py \
+--config_file=third_party/tensorflow_models/official/projects/edgetpu/nlp/experiments/mobilebert_edgetpu_xs.yaml \
+--export_path=/tmp/ \
+--quantization_method=full-integer
+"""
+# pylint: enable=line-too-long
+import os
+import tempfile
+from typing import Sequence
+from absl import app
+from absl import flags
+from absl import logging
+import orbit
+import tensorflow as tf
+from official.common import flags as tfm_flags
+from official.nlp.data import data_loader_factory
+from official.nlp.data import question_answering_dataloader
+from official.nlp.modeling import models
+from official.projects.edgetpu.nlp.configs import params
+from official.projects.edgetpu.nlp.modeling import model_builder
+from official.projects.edgetpu.nlp.utils import utils
+FLAGS = flags.FLAGS
+SQUAD_TRAIN_SPLIT = 'gs://**/tp/bert/squad_v1.1/train.tf_record'
+flags.DEFINE_string('export_path', '/tmp/',
+                    'File path to store tflite model.')
+flags.DEFINE_enum('quantization_method', 'float',
+                  ['full-integer', 'hybrid', 'float'], 'Quantization method.')
+flags.DEFINE_integer('batch_size', 1,
+                     'Fixed batch size for exported TFLite model.')
+flags.DEFINE_integer('sequence_length', 384,
+                     'Fixed sequence length.')
+flags.DEFINE_string('model_checkpoint', None,
+                    'Checkpoint path for the model. Model will be initialized'
+                    'with random weights if path is None.')
+def build_model_for_serving(model: tf.keras.Model,
+                            sequence_length: int = 384,
+                            batch_size: int = 1) -> tf.keras.Model:
+  """Builds MLPerf evaluation compatible models.
+  To run the model on device, the model input/output datatype and node names
+  need to match the MLPerf setup.
+  Args:
+    model: Input keras model.
+    sequence_length: BERT model sequence length.
+    batch_size: Inference batch size.
+  Returns:
+    Keras model with new input/output nodes.
+  """
+  word_ids = tf.keras.Input(shape=(sequence_length,),
+                            batch_size=batch_size,
+                            dtype=tf.int32,
+                            name='input_word_ids')
+  mask = tf.keras.Input(shape=(sequence_length,),
+                        batch_size=batch_size,
+                        dtype=tf.int32, name='input_mask')
+  type_ids = tf.keras.Input(shape=(sequence_length,),
+                            batch_size=batch_size,
+                            dtype=tf.int32, name='input_type_ids')
+  model_output = model([word_ids, type_ids, mask])
+  # Use identity layers wrapped in lambdas to explicitly name the output
+  # tensors.
+  start_logits = tf.keras.layers.Lambda(
+      tf.identity, name='start_positions')(
+          model_output[0])
+  end_logits = tf.keras.layers.Lambda(
+      tf.identity, name='end_positions')(
+          model_output[1])
+  model = tf.keras.Model(
+      inputs=[word_ids, type_ids, mask],
+      outputs=[start_logits, end_logits])
+  return model
+def build_inputs(data_params, input_context=None):
+  """Returns tf.data.Dataset for sentence_prediction task."""
+  return data_loader_factory.get_data_loader(data_params).load(input_context)
+def main(argv: Sequence[str]) -> None:
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+  # Set up experiment params and load the configs from file/files.
+  experiment_params = params.EdgeTPUBERTCustomParams()
+  experiment_params = utils.config_override(experiment_params, FLAGS)
+  # change the input mask type to tf.float32 to avoid additional casting op.
+  experiment_params.student_model.encoder.mobilebert.input_mask_dtype = 'float32'
+  # Experiments indicate using -120 as the mask value for Softmax is good enough
+  # for both int8 and bfloat. So we set quantization_friendly to True for both
+  # quant and float model.
+  pretrainer_model = model_builder.build_bert_pretrainer(
+      experiment_params.student_model,
+      name='pretrainer',
+      quantization_friendly=True)
+  encoder_network = pretrainer_model.encoder_network
+  model = models.BertSpanLabeler(
+      network=encoder_network,
+      initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01))
+  # Load model weights.
+  if FLAGS.model_checkpoint is not None:
+    checkpoint_dict = {'model': model}
+    checkpoint = tf.train.Checkpoint(**checkpoint_dict)
+    checkpoint.restore(FLAGS.model_checkpoint).assert_existing_objects_matched()
+  model_for_serving = build_model_for_serving(model)
+  model_for_serving.summary()
+  # TODO(b/194449109): Need to save the model to file and then convert tflite
+  # with 'tf.lite.TFLiteConverter.from_saved_model()' to get the expected
+  # accuracy
+  tmp_dir = tempfile.TemporaryDirectory().name
+  model_for_serving.save(tmp_dir)
+  def _representative_dataset():
+    dataset_params = question_answering_dataloader.QADataConfig()
+    dataset_params.input_path = SQUAD_TRAIN_SPLIT
+    dataset_params.drop_remainder = False
+    dataset_params.global_batch_size = 1
+    dataset_params.is_training = True
+    dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(),
+                                                   build_inputs, dataset_params)
+    for example in dataset.take(100):
+      inputs = example[0]
+      input_word_ids = inputs['input_word_ids']
+      input_mask = inputs['input_mask']
+      input_type_ids = inputs['input_type_ids']
+      yield [input_word_ids, input_mask, input_type_ids]
+  converter = tf.lite.TFLiteConverter.from_saved_model(tmp_dir)
+  if FLAGS.quantization_method in ['full-integer', 'hybrid']:
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+  if FLAGS.quantization_method in ['full-integer']:
+    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+    converter.inference_input_type = tf.int8
+    converter.inference_output_type = tf.float32
+    converter.representative_dataset = _representative_dataset
+  tflite_quant_model = converter.convert()
+  export_model_path = os.path.join(FLAGS.export_path, 'model.tflite')
+  with tf.io.gfile.GFile(export_model_path, 'wb') as f:
+    f.write(tflite_quant_model)
+  logging.info('Successfully save the tflite to %s', FLAGS.export_path)
+if __name__ == '__main__':
+  tfm_flags.define_flags()
+  app.run(main)
--- a/official/projects/edgetpu/nlp/serving/export_tflite_squad_test.py
+++ b/official/projects/edgetpu/nlp/serving/export_tflite_squad_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for export_tflite_squad."""
+import tensorflow as tf
+from official.nlp.modeling import models
+from official.projects.edgetpu.nlp.configs import params
+from official.projects.edgetpu.nlp.modeling import model_builder
+from official.projects.edgetpu.nlp.serving import export_tflite_squad
+class ExportTfliteSquadTest(tf.test.TestCase):
+  def setUp(self):
+    super(ExportTfliteSquadTest, self).setUp()
+    experiment_params = params.EdgeTPUBERTCustomParams()
+    pretrainer_model = model_builder.build_bert_pretrainer(
+        experiment_params.student_model, name='pretrainer')
+    encoder_network = pretrainer_model.encoder_network
+    self.span_labeler = models.BertSpanLabeler(
+        network=encoder_network,
+        initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01))
+  def test_model_input_output(self):
+    test_model = export_tflite_squad.build_model_for_serving(self.span_labeler)
+    # Test model input order, names, and shape.
+    self.assertEqual(test_model.input[0].name, 'input_word_ids')
+    self.assertEqual(test_model.input[1].name, 'input_type_ids')
+    self.assertEqual(test_model.input[2].name, 'input_mask')
+    self.assertEqual(test_model.input[0].shape, (1, 384))
+    self.assertEqual(test_model.input[1].shape, (1, 384))
+    self.assertEqual(test_model.input[2].shape, (1, 384))
+    # Test model output order, name, and shape.
+    self.assertEqual(test_model.output[0].name, 'start_positions/Identity:0')
+    self.assertEqual(test_model.output[1].name, 'end_positions/Identity:0')
+    self.assertEqual(test_model.output[0].shape, (1, 384))
+    self.assertEqual(test_model.output[1].shape, (1, 384))
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/nlp/utils/__init__.py
+++ b/official/projects/edgetpu/nlp/utils/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/official/projects/edgetpu/nlp/utils/utils.py
+++ b/official/projects/edgetpu/nlp/utils/utils.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility functions."""
+import os
+import pprint
+from absl import logging
+import tensorflow as tf
+from official.modeling import hyperparams
+from official.projects.edgetpu.nlp.configs import params
+def serialize_config(experiment_params: params.EdgeTPUBERTCustomParams,
+                     model_dir: str):
+  """Serializes and saves the experiment config."""
+  params_save_path = os.path.join(model_dir, 'params.yaml')
+  logging.info('Saving experiment configuration to %s', params_save_path)
+  tf.io.gfile.makedirs(model_dir)
+  hyperparams.save_params_dict_to_yaml(experiment_params, params_save_path)
+# Note: Do not call this utility function unless you load the `flags`
+# module in your script.
+def config_override(experiment_params, flags_obj):
+  """Overrides ExperimentConfig according to flags."""
+  if not hasattr(flags_obj, 'tpu'):
+    raise ModuleNotFoundError(
+        '`tpu` is not found in FLAGS. Need to load flags.py first.')
+  # Change runtime.tpu to the real tpu.
+  experiment_params.override({
+      'runtime': {
+          'tpu_address': flags_obj.tpu,
+      }
+  })
+  # Get the first level of override from `--config_file`.
+  #   `--config_file` is typically used as a template that specifies the common
+  #   override for a particular experiment.
+  for config_file in flags_obj.config_file or []:
+    experiment_params = hyperparams.override_params_dict(
+        experiment_params, config_file, is_strict=True)
+  # Get the second level of override from `--params_override`.
+  #   `--params_override` is typically used as a further override over the
+  #   template. For example, one may define a particular template for training
+  #   ResNet50 on ImageNet in a config file and pass it via `--config_file`,
+  #   then define different learning rates and pass it via `--params_override`.
+  if flags_obj.params_override:
+    experiment_params = hyperparams.override_params_dict(
+        experiment_params, flags_obj.params_override, is_strict=True)
+  experiment_params.validate()
+  experiment_params.lock()
+  pp = pprint.PrettyPrinter()
+  logging.info('Final experiment parameters: %s',
+               pp.pformat(experiment_params.as_dict()))
+  model_dir = get_model_dir(experiment_params, flags_obj)
+  if flags_obj.mode is not None:
+    if 'train' in flags_obj.mode:
+      # Pure eval modes do not output yaml files. Otherwise continuous eval job
+      # may race against the train job for writing the same file.
+      serialize_config(experiment_params, model_dir)
+  return experiment_params
+def get_model_dir(experiment_params, flags_obj):
+  """Gets model dir from Flags."""
+  del experiment_params
+  return flags_obj.model_dir
+def load_checkpoint(model: tf.keras.Model, ckpt_path: str):
+  """Initializes model with the checkpoint."""
+  ckpt_dir_or_file = ckpt_path
+  if tf.io.gfile.isdir(ckpt_dir_or_file):
+    ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
+  # Makes sure the pretrainer variables are created.
+  _ = model(model.inputs)
+  checkpoint = tf.train.Checkpoint(
+      **model.checkpoint_items)
+  checkpoint.read(ckpt_dir_or_file).expect_partial()
+  logging.info('Successfully load parameters for %s model', model.name)
--- a/official/projects/edgetpu/nlp/utils/utils_test.py
+++ b/official/projects/edgetpu/nlp/utils/utils_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for utils.py."""
+from absl import flags
+import tensorflow as tf
+import yaml
+from official.projects.edgetpu.nlp.configs import params
+from official.projects.edgetpu.nlp.modeling import model_builder
+from official.projects.edgetpu.nlp.utils import utils
+FLAGS = flags.FLAGS
+# Helper function to compare two nested Dicts.
+# Note that this function only ensures all the fields in dict_a have definition
+# and same value in dict_b. This function does not guarantee that
+# dict_a == dict_b.
+def nested_dict_compare(dict_a, dict_b):
+  for k, v in sorted(dict_a.items()):
+    if k not in dict_b:
+      return False
+    if isinstance(v, dict) and isinstance(dict_b[k], dict):
+      if not nested_dict_compare(dict_a[k], dict_b[k]):
+        return False
+    else:
+      # A caveat: When dict_a[k] = 1, dict_b[k] = True, the return is True.
+      if dict_a[k] != dict_b[k]:
+        return False
+  return True
+class UtilsTest(tf.test.TestCase):
+  def test_config_override(self):
+    # Define several dummy flags which are call by the utils.config_override
+    # function.
+    file_path = 'third_party/tensorflow_models/official/projects/edgetpu/nlp/experiments/mobilebert_edgetpu_m.yaml'
+    flags.DEFINE_string('tpu', None, 'tpu_address.')
+    flags.DEFINE_list('config_file', [file_path],
+                      'A list of config files path.')
+    flags.DEFINE_string('params_override', None, 'Override params.')
+    flags.DEFINE_string('model_dir', '/tmp/', 'Model saving directory.')
+    flags.DEFINE_list('mode', ['train'], 'Job mode.')
+    flags.DEFINE_bool('use_vizier', False,
+                      'Whether to enable vizier based hyperparameter search.')
+    experiment_params = params.EdgeTPUBERTCustomParams()
+    experiment_params = utils.config_override(experiment_params, FLAGS)
+    experiment_params_dict = experiment_params.as_dict()
+    with tf.io.gfile.GFile(file_path, 'r') as f:
+      loaded_dict = yaml.load(f, Loader=yaml.FullLoader)
+    # experiment_params contains all the configs but the loaded_dict might
+    # only contains partial of the configs.
+    self.assertTrue(nested_dict_compare(loaded_dict, experiment_params_dict))
+  def test_load_checkpoint(self):
+    """Test the pretrained model can be successfully loaded."""
+    experiment_params = params.EdgeTPUBERTCustomParams()
+    student_pretrainer = experiment_params.student_model
+    student_pretrainer.encoder.type = 'mobilebert'
+    pretrainer = model_builder.build_bert_pretrainer(
+        pretrainer_cfg=student_pretrainer,
+        name='test_model')
+    # Makes sure the pretrainer variables are created.
+    checkpoint_path = self.create_tempfile().full_path
+    _ = pretrainer(pretrainer.inputs)
+    pretrainer.save_weights(checkpoint_path)
+    utils.load_checkpoint(pretrainer, checkpoint_path)
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/projects/edgetpu/vision/README.md
+++ b/official/projects/edgetpu/vision/README.md
+# EdgeTPU-optimized Vision Models
+## Overview
+This project includes computer vision models optimized for Edge TPU featured in
+Pixel Phones, Coral Products, and more. These models improve the latency and
+energy vs. accuracy pareto-frontier significantly compared to the existing
+SOTA models when running on the Edge TPU devices.
+## MobileNet-EdgeTPU Classification Models
+### Introduction
+We are presenting a family of computer vision models based on MobileNetEdgeTPUV2
+that are optimized for the next generation Edge TPU ML accelerator in the Google
+Tensor SoC that powers the Pixel 6 phones. These models improve the
+latency-accuracy pareto-frontier compared to the existing SOTA on-device models
+including their predecessor MobileNetEdgeTPUs. MobileNetEdgeTPUV2 can be used as
+a standalone image classification model or as a backbone for other computer
+vision tasks such as object detection or semantic segmentation.
+### Search space design
+During the design of MobileNetEdgeTPUV2 we crafted a neural network search space
+which includes building blocks that run efficiently on the Edge TPU accelerator
+while providing better algorithmic qualities and leveraged AutoML to find the
+optimal architectures. As one of the key optimizations, we introduce Group
+Convolution based Inverted Bottleneck (IBN) blocks that provide great
+flexibility in achieving a tradeoff between latency and accuracy.
+Inverted Bottleneck (IBN) is a widely used building block in architecting a
+neural network for mobile vision tasks. A conventional IBN uses pointwise
+convolutions for expansion/projection before/after a depthwise convolution.
+Previously it has been shown that using a full convolution replacing the
+pointwise expansion and depthwise convolution can provide more trainable
+parameters while being faster. However, one big limitation is that using these
+full-convolution IBNs can get very expensive in terms of latency and memory
+requirements, especially for narrow/deep tensors that we see in later stages of
+vision models. This limits the use of “fused” full-convolution IBNs throughout
+the model and leaves depthwise IBN as the only alternative.
+<figure align="center">
+<img width=70% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-ibn-intro.png>
+  <figcaption>Inverted bottleneck block (IBN) variants: (a) Conventional with depthwise, (b) Fused-IBN, (c)GC-IBN with group convolutions in the expansion phase</figcaption>
+</figure>
+In this work we utilize Group Convolution (GC) as part of the fused expansion in
+constructing IBNs (Figure 1). GC based IBN becomes a versatile block that opens
+up a large design space between conventional depthwise IBNs and fused
+full-convolution IBNs which can be controlled by the group size parameter.
+Figure 2 demonstrates the search space enabled by GC-based IBNs that allows a
+flexible tradeoff between latency and number of trainable parameters. GC-based
+IBNs allow increasing the number of trainable parameters gradually without
+requiring the latency cost of full-convolution based IBNs. Moreover, they can
+also be faster than conventional IBNs with depthwise convolutions while
+providing more trainable parameters.
+<figure align="center">
+<img width=60% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-gc-comparison.png>
+</figure>
+### Model performance on Edge TPU
+Tradeoffs discussed above and exemplified in Figure 2 are highly dependent on
+the tensor shapes and cannot be generalized throughout the neural network.
+Hence, we use AutoML techniques as a rescue to find the optimal block decisions
+and craft a family of network architectures at different latency targets. Figure
+3 demonstrates that the resulting MobilenetEdgeTPUV2 model-family improves the
+pareto-frontier compared to the existing on-device SOTA models when run on Edge
+TPU.
+<figure align="center">
+<img width=70% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-edgetpu-classification-plot.png>
+<figcaption>Comparison of Imagenet top-1 accuracy and Pixel 6 Edge TPU latency of MobilenetEdgeTPUV2 models with other on-device classification models</figcaption>
+</figure>
+#### On-device benchmarking of classification models
+Results on on-device benchmarking of various int8 quantized image classification
+models for 224x224 input resolution:
+Model                   | Accuracy (int8) | Pixel 6 Edge TPU Latency (ms)
+----------------------- | :-------------- | :----------------------------
+MobileNetEdgeTPUv2-Tiny | 74.70%          | 0.78
+MobileNetEdgeTPUv2-XS   | 75.78%          | 0.82
+MobileNetEdgeTPUv2-S    | 77.36%          | 1.03
+MobileNetEdgeTPUv2-M    | 78.34%          | 1.35
+MobileNetEdgeTPUv2-L    | 78.97%          | 1.64
+MobileNetEdgeTPU dm0.75 | 73.5%           | 0.79
+MobileNetEdgeTPU dm1.0  | 75.6%           | 0.92
+MobileNetEdgeTPU dm1.25 | 77.06%          | 1.2
+MobileNetEdgeTPU dm1.5  | 75.9%           | 1.42
+MobileNetEdgeTPU dm1.75 | 78.6%           | 1.93
+### Model performance on Pixel 6 CPU
+Our primary optimization target is the Edge TPU accelerator however in our
+search space we include operations that also run well on Pixel 6 CPU to be able
+to reach a wide range of platforms. Moreover, we implement GC using functionally
+equivalent series of commonly used ML primitives (channelwise slice, full
+convolution, concatenation) as shown in Figure 2, since a native GC operation
+may not be supported for all target platforms. As a result, the performance of
+MobilenetEdgeTPUV2 is also superior to other on-device models when run on Pixel
+6 CPU as shown in Figure 4.
+<figure align="center">
+<img width=70% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-cpu-classification-plot.png>
+<figcaption>Comparison of Imagenet top-1 accuracy and Pixel 6 latency of MobilenetEdgeTPUV2 models with other on-device classification models</figcaption>
+</figure>
+## Semantic segmentation task
+### Using classification models as backbone
+We also present segmentation models based on MobilenetEdgeTPUV2 backbone and
+DeepLab v3 plus decoder and head (first used
+[here](https://arxiv.org/pdf/1802.02611.pdf)). These models optimized for the
+next generation Edge TPU accelerators featured in Pixel 6 phones and improve the
+latency-accuracy pareto-frontier compared to the their predecessor based on
+MobileNetV2 and DeepLabV3+.
+#### Segmentation model design
+The segmentation model is built using the pretrained MobilenetEdgeTPUV2 as a
+feature encoder and ASPP decoder in conjunction with a Deeplab V3 Plus head.
+Separable convolutions used to reduce the size of the model.
+<figure align="center">
+<img width=60% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-seg-flow.png>
+<figcaption></figcaption>
+</figure>
+The last two levels of the model (bilinear resizing and Argmax) contribute
+significantly to latency on the device model. This is due to the large
+activation size between these layers (512 x 512 x Number of classes). These
+layers can be merged without significantly impacting quality scores by making
+Argmax smaller and scaling the classes to the desired size with nearest
+neighbor.
+<figure align="center">
+<img width=60% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-seg-fused-argmax.png>
+</figure>
+### On-device benchmarking of segmentation models
+<figure align="center">
+<img src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-seg-plot.png width=60%>
+  <figcaption>Performance of AutosegEdgeTPU and MobilenetEdgeTPUV2+DeeplabV3+ models on the 32-class ADE20K semantic segmentation task.</figcaption>
+</figure>
+| Backbone              | Segmentation | #Parameters | ADE20K   | Pixel 6 Edge |
+:                       : Head         : (million)   : 32-class : TPU latency  :
+:                       :              :             : mIOU     : (ms)         :
+| --------------------- | ------------ | ----------- | -------- | ------------ |
+| MobileNet V2          | DeeplabV3+   | 2.34        | 54.06%   | 7.5          |
+: (baseline)            :              :             :          :              :
+| MobilenetEdgeTPUV2-XS | DeeplabV3+   | 3.6         | 56.02%   | 5.2          |
+| MobilenetEdgeTPUV2-S  | DeeplabV3+   | 5.2         | 59.43%   | 5.9          |
+| MobilenetEdgeTPUV2-M  | DeeplabV3+   | 7.7         | 59.81%   | 7.2          |
+| AutosegEdgeTPU-XS     | BiFPN        | 2.9         | 59.64%   | 5.4          |
+| AutosegEdgeTPU-S      | BiFPN        | 3.1         | 61.31%   | 5.7          |
+By fusing argmax with resize operator as shown above, it is possible to further
+improve the on-device latency of the segmentation models without significantly
+impacting the quality:
+| Backbone              | Segmentation | #Parameters | ADE20K   | Pixel 6 Edge |
+:                       : Head         : (million)   : 32-class : TPU latency  :
+:                       :              :             : mIOU     : (ms)         :
+| --------------------- | ------------ | ----------- | -------- | ------------ |
+| MobilenetEdgeTPUV2-XS | DeeplabV3+   | 3.6         | 56%      | 3.4          |
+| MobilenetEdgeTPUV2-S  | DeeplabV3+   | 5.2         | 59.41%   | 4.2          |
+| MobilenetEdgeTPUV2-M  | DeeplabV3+   | 7.7         | 59.79%   | 5.5          |
+| AutosegEdgeTPU-XS     | BiFPN        | 2.9         | 59.62%   | 3.6          |
+| AutosegEdgeTPU-S      | BiFPN        | 3.1         | 61.28%   | 3.9          |
+### Training the models
+Note that the `EXPERIMENT_TYPE` has to be in one of the preregistered
+classification configs, such as `mobilenet_edgetpu_xs` for classification
+models. In case you train segmentation model `EXPERIMENT_TYPE` has to be in one
+of the preregistered segmentations configs], such as
+`seg_deeplabv3plus_mobilenet_edgetpuv2_s_ade20k`, `autoseg_edgetpu_xs`
+```
+EXPERIMENT_NAME=xxx  # Change this for your run, for example, 'mobilenet-edgetpu-test-run'
+EXPERIMENT_TYPE=xxx  # Change this for your run, for example, 'mobilenet_edgetpu_v2_xs'
+$ python3 train.py \
+--experiment_name=${EXPERIMENT_NAME} \
+--experiment_type=${EXPERIMENT_TYPE}  \
+--mode=train_and_eval
+```
+### From training to quantized inference deployment
+To export quantized tflite models using tensorflow post-training quantization:
+**For classification models**:
+```
+$ python3 serving/export_tflite.py 
+--model_name=${EXPERIMENT_TYPE} \
+--ckpt_path=${CHECKPOINT} \
+--dataset_dir=/path/to/calibration/dataset \
+--output_dir=/tmp \
+--quantize \
+--image_size=224
+```
+Note that the `EXPERIMENT_TYPE` has to be in one of the preregistered
+classification configs, such as `mobilenet_edgetpu_xs`.
+**For segmentation models**:
+```
+$ python3 serving/export_tflite.py \
+--model_name=${EXPERIMENT_TYPE} 
+--ckpt_path=${CHECKPOINT} \
+--dataset_dir=/path/to/calibration/dataset \ 
+--output_dir=/tmp \
+--quantize \
+--quantize_less_restrictive \
+--image_size=512 \
+--finalize_method=${ARGMAX_FUSION}
+```
+`EXPERIMENT_TYPE` has to be in one of the preregistered segmentations configs,
+such as `deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32`.
+`ARGMAX_FUSION` has to be in one of the following:
+-   `resize512,argmax`: Argmax applied after scaling the output to 512x512.
+-   `resize256,argmax,resize512,squeeze`: Scale the output to 256x256, apply
+    argmax, scale to 512x512 using nearest neighbor upsampling
+-   `resize128,argmax,resize512,squeeze`: Scale the output to 128x128, apply
+    argmax, scale to 512x512 using nearest neighbor upsampling
+### On-device benchmarking
+The models in this repository are compatible with NNAPI and can be benchmarked
+on Pixel 6 devices using the
+[tflite benchmark tool](https://www.tensorflow.org/lite/performance/measurement)
+While using the benchmark tool, enable the use of NNAPI by setting the
+`use_nnapi` command line argument to `true`, and specifying the
+`nnapi_accelerator` as `google-edgetpu`
+```
+$ bazel build -c opt --config=android_arm64 tensorflow/lite/tools/benchmark:benchmark_model
+# Push binary to device
+$ adb push bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model /data/local/tmp
+# Push model to device
+$ adb push /path/to/model.tflite /data/local/tmp/
+# Run on-device benchmarking
+$ adb shell /data/local/tmp/benchmark_model --graph=/data/local/tmp/model.tflite --use_nnapi=true --
+nnapi_accelerator_name=google-edgetpu
+```
--- a/official/projects/edgetpu/vision/__init__.py
+++ b/official/projects/edgetpu/vision/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/official/projects/edgetpu/vision/configs/__init__.py
+++ b/official/projects/edgetpu/vision/configs/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/official/projects/edgetpu/vision/configs/mobilenet_edgetpu_config.py
+++ b/official/projects/edgetpu/vision/configs/mobilenet_edgetpu_config.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=line-too-long
+# type: ignore
+"""Configuration definitions for MobilenetEdgeTPU losses, learning rates, optimizers, and training."""
+import dataclasses
+import os
+from typing import Any, Mapping, Optional
+# Import libraries
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.modeling import optimization
+from official.vision.beta.configs import common
+from official.vision.beta.configs import image_classification as base_config
+@dataclasses.dataclass
+class MobilenetEdgeTPUModelConfig(base_config.ImageClassificationModel):
+  """Configuration for the MobilenetEdgeTPU model.
+  Attributes:
+    name: The name of the model. Defaults to 'MobilenetEdgeTPU'.
+    model_params: A dictionary that represents the parameters of the
+      EfficientNet model. These will be passed in to the "from_name" function.
+  """
+  model_params: Mapping[str, Any] = dataclasses.field(
+      default_factory=lambda: {  # pylint: disable=g-long-lambda
+          'model_name': 'mobilenet_edgetpu_v2_xs',
+          'model_weights_path': '',
+          'checkpoint_format': 'tf_checkpoint',
+          'overrides': {
+              'batch_norm': 'tpu',
+              'num_classes': 1001,
+              'rescale_input': False,
+              'dtype': 'bfloat16'
+          }
+      })
+@dataclasses.dataclass
+class MobilenetEdgeTPUTaskConfig(base_config.ImageClassificationTask):
+  """Task defination for MobileNetEdgeTPU.
+  Attributes:
+    model: A `ModelConfig` instance.
+    saved_model_path: Instead of initializing a model from the model config,
+      the model can be loaded from a file path.
+  """
+  model: MobilenetEdgeTPUModelConfig = MobilenetEdgeTPUModelConfig()
+  saved_model_path: Optional[str] = None
+IMAGENET_TRAIN_EXAMPLES = 1281167
+IMAGENET_VAL_EXAMPLES = 50000
+IMAGENET_INPUT_PATH_BASE = 'imagenet-2012-tfrecord'
+def mobilenet_edgetpu_base_experiment_config(
+    model_name: str) -> cfg.ExperimentConfig:
+  """Image classification on imagenet with mobilenet_edgetpu.
+  Experiment config common across all mobilenet_edgetpu variants.
+  Args:
+    model_name: Name of the mobilenet_edgetpu model variant
+  Returns:
+    ExperimentConfig
+  """
+  train_batch_size = 4096
+  eval_batch_size = 4096
+  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
+  mobilenet_edgetpu_config = MobilenetEdgeTPUModelConfig(
+      num_classes=1001, input_size=[224, 224, 3])
+  mobilenet_edgetpu_config.model_params.model_name = model_name
+  config = cfg.ExperimentConfig(
+      task=MobilenetEdgeTPUTaskConfig(
+          model=mobilenet_edgetpu_config,
+          losses=base_config.Losses(label_smoothing=0.1),
+          train_data=base_config.DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
+              is_training=True,
+              global_batch_size=train_batch_size,
+              dtype='bfloat16',
+              aug_type=common.Augmentation(type='autoaug')),
+          validation_data=base_config.DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
+              is_training=False,
+              dtype='bfloat16',
+              drop_remainder=False,
+              global_batch_size=eval_batch_size)),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch * 5,
+          max_to_keep=10,
+          train_steps=550 * steps_per_epoch,
+          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'rmsprop',
+                  'rmsprop': {
+                      'rho': 0.9,
+                      'momentum': 0.9,
+                      'epsilon': 0.001,
+                  }
+              },
+              'ema': {
+                  'average_decay': 0.99,
+                  'trainable_weights_only': False,
+              },
+              'learning_rate': {
+                  'type': 'exponential',
+                  'exponential': {
+                      'initial_learning_rate':
+                          0.008 * (train_batch_size // 128),
+                      'decay_steps':
+                          int(2.4 * steps_per_epoch),
+                      'decay_rate':
+                          0.97,
+                      'staircase':
+                          True
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 5 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              },
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+  return config
+# Registration for MobileNet-EdgeTPU-Search models.
+# When this config is used, users need to specify the saved model path via
+# --params_override=task.saved_model_path='your/saved_model/path/'.
+@exp_factory.register_config_factory('mobilenet_edgetpu_search')
+def mobilenet_edgetpu_search() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_search')
+# Registration for MobileNet-EdgeTPU-V2 models.
+@exp_factory.register_config_factory('mobilenet_edgetpu_v2_tiny')
+def mobilenet_edgetpu_v2_tiny() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_v2_tiny')
+# Registration for MobileNet-EdgeTPU-V2 models.
+@exp_factory.register_config_factory('mobilenet_edgetpu_v2_xs')
+def mobilenet_edgetpu_v2_xs() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_v2_xs')
+@exp_factory.register_config_factory('mobilenet_edgetpu_v2_s')
+def mobilenet_edgetpu_v2_s() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_v2_s')
+@exp_factory.register_config_factory('mobilenet_edgetpu_v2_m')
+def mobilenet_edgetpu_v2_m() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_v2_m')
+@exp_factory.register_config_factory('mobilenet_edgetpu_v2_l')
+def mobilenet_edgetpu_v2_l() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_v2_l')
+# Registration for MobileNet-EdgeTPU-V1 models.
+@exp_factory.register_config_factory('mobilenet_edgetpu')
+def mobilenet_edgetpu() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu')
+# Registration for MobileNet-EdgeTPU-V1 models.
+# We use 'depth_multiplier' to scale the models.
+# E.g. dm1p25 implies depth multiplier of 1.25x
+@exp_factory.register_config_factory('mobilenet_edgetpu_dm1p25')
+def mobilenet_edgetpu_dm1p25() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_dm1p25')
+@exp_factory.register_config_factory('mobilenet_edgetpu_dm1p5')
+def mobilenet_edgetpu_dm1p5() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_dm1p5')
+@exp_factory.register_config_factory('mobilenet_edgetpu_dm1p75')
+def mobilenet_edgetpu_dm1p75() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_dm1p75')
+# Registration for AutoSeg-EdgeTPU backbones
+@exp_factory.register_config_factory('autoseg_edgetpu_backbone_xs')
+def autoseg_edgetpu_backbone_xs() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('autoseg_edgetpu_backbone_xs')
+@exp_factory.register_config_factory('autoseg_edgetpu_backbone_s')
+def autoseg_edgetpu_backbone_s() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('autoseg_edgetpu_backbone_s')
+@exp_factory.register_config_factory('autoseg_edgetpu_backbone_m')
+def autoseg_edgetpu_backbone_m() -> cfg.ExperimentConfig:
+  return mobilenet_edgetpu_base_experiment_config('autoseg_edgetpu_backbone_m')
--- a/official/projects/edgetpu/vision/configs/semantic_segmentation_config.py
+++ b/official/projects/edgetpu/vision/configs/semantic_segmentation_config.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Semantic segmentation configuration definition.
+The segmentation model is built using the mobilenet edgetpu v2 backbone and
+deeplab v3 segmentation head.
+"""
+import dataclasses
+import os
+from typing import Optional
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.modeling import optimization
+from official.modeling.hyperparams import config_definitions as cfg
+from official.vision.beta.configs import common
+from official.vision.beta.configs import decoders
+from official.vision.beta.configs import semantic_segmentation as base_cfg
+from official.vision.beta.configs.google import backbones
+@dataclasses.dataclass
+class MobileNetEdgeTPU(hyperparams.Config):
+  """MobileNetEdgeTPU config."""
+  model_id: str = 'mobilenet_edgetpu_v2_s'
+  freeze_large_filters: Optional[int] = None
+  pretrained_checkpoint_path: Optional[str] = None
+@dataclasses.dataclass
+class Backbone(backbones.Backbone):
+  """Configuration for backbones.
+  Attributes:
+    type: 'str', type of backbone be used, on the of fields below.
+    spinenet_seg: spinenet-seg backbone config.
+  """
+  type: Optional[str] = None
+  mobilenet_edgetpu: MobileNetEdgeTPU = MobileNetEdgeTPU()
+@dataclasses.dataclass
+class CustomSemanticSegmentationTaskConfig(base_cfg.SemanticSegmentationTask):
+  """Same config for custom taks."""
+  model: Optional[base_cfg.SemanticSegmentationModel] = None
+  train_data: base_cfg.DataConfig = base_cfg.DataConfig(is_training=True)
+  validation_data: base_cfg.DataConfig = base_cfg.DataConfig(is_training=False)
+  evaluation: base_cfg.Evaluation = base_cfg.Evaluation()
+# ADE 20K Dataset
+ADE20K_TRAIN_EXAMPLES = 20210
+ADE20K_VAL_EXAMPLES = 2000
+ADE20K_INPUT_PATH_BASE = 'gs://**/ADE20K'
+PRETRAINED_CKPT_PATH_BASE = 'gs://**/placeholder_for_edgetpu_models'
+BACKBONE_PRETRAINED_CHECKPOINT = {
+    'mobilenet_edgetpu_v2_l':
+        PRETRAINED_CKPT_PATH_BASE +
+        '/pretrained_checkpoints/mobilenet_edgetpu_v2_l/ckpt-171600',
+    'mobilenet_edgetpu_v2_m':
+        PRETRAINED_CKPT_PATH_BASE +
+        '/pretrained_checkpoints/mobilenet_edgetpu_v2_m/ckpt-171600',
+    'mobilenet_edgetpu_v2_s':
+        PRETRAINED_CKPT_PATH_BASE +
+        '/pretrained_checkpoints/mobilenet_edgetpu_v2_s/ckpt-171600',
+    'mobilenet_edgetpu_v2_xs':
+        PRETRAINED_CKPT_PATH_BASE +
+        '/pretrained_checkpoints/mobilenet_edgetpu_v2_xs/ckpt-171600',
+}
+BACKBONE_HEADPOINT = {
+    'mobilenet_edgetpu_v2_l': 4,
+    'mobilenet_edgetpu_v2_m': 4,
+    'mobilenet_edgetpu_v2_s': 4,
+    'mobilenet_edgetpu_v2_xs': 4,
+}
+BACKBONE_LOWER_FEATURES = {
+    'mobilenet_edgetpu_v2_l': 3,
+    'mobilenet_edgetpu_v2_m': 3,
+    'mobilenet_edgetpu_v2_s': 3,
+    'mobilenet_edgetpu_v2_xs': 3,
+}
+def seg_deeplabv3plus_ade20k_32(backbone: str,
+                                init_backbone: bool = True
+                               ) -> cfg.ExperimentConfig:
+  """Semantic segmentation on ADE20K dataset with deeplabv3+."""
+  epochs = 200
+  train_batch_size = 128
+  eval_batch_size = 32
+  image_size = 512
+  steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size
+  aspp_dilation_rates = [5, 10, 15]
+  pretrained_checkpoint_path = BACKBONE_PRETRAINED_CHECKPOINT[
+      backbone] if init_backbone else None
+  config = cfg.ExperimentConfig(
+      task=CustomSemanticSegmentationTaskConfig(
+          model=base_cfg.SemanticSegmentationModel(
+              # ADE20K uses only 32 semantic classes for train/evaluation.
+              # The void (background) class is ignored in train and evaluation.
+              num_classes=32,
+              input_size=[None, None, 3],
+              backbone=Backbone(
+                  type='mobilenet_edgetpu',
+                  mobilenet_edgetpu=MobileNetEdgeTPU(
+                      model_id=backbone,
+                      pretrained_checkpoint_path=pretrained_checkpoint_path,
+                      freeze_large_filters=500,
+                  )),
+              decoder=decoders.Decoder(
+                  type='aspp',
+                  aspp=decoders.ASPP(
+                      level=BACKBONE_HEADPOINT[backbone],
+                      use_depthwise_convolution=True,
+                      dilation_rates=aspp_dilation_rates,
+                      pool_kernel_size=[256, 256],
+                      num_filters=128,
+                      dropout_rate=0.3,
+                  )),
+              head=base_cfg.SegmentationHead(
+                  level=BACKBONE_HEADPOINT[backbone],
+                  num_convs=2,
+                  num_filters=256,
+                  use_depthwise_convolution=True,
+                  feature_fusion='deeplabv3plus',
+                  low_level=BACKBONE_LOWER_FEATURES[backbone],
+                  low_level_num_filters=48),
+              norm_activation=common.NormActivation(
+                  activation='relu',
+                  norm_momentum=0.99,
+                  norm_epsilon=2e-3,
+                  use_sync_bn=False)),
+          train_data=base_cfg.DataConfig(
+              input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'),
+              output_size=[image_size, image_size],
+              is_training=True,
+              global_batch_size=train_batch_size),
+          validation_data=base_cfg.DataConfig(
+              input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'),
+              output_size=[image_size, image_size],
+              is_training=False,
+              global_batch_size=eval_batch_size,
+              resize_eval_groundtruth=True,
+              drop_remainder=False),
+          evaluation=base_cfg.Evaluation(report_train_mean_iou=False),
+      ),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          train_steps=epochs * steps_per_epoch,
+          validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'adam',
+              },
+              'learning_rate': {
+                  'type': 'polynomial',
+                  'polynomial': {
+                      'initial_learning_rate': 0.0001,
+                      'decay_steps': epochs * steps_per_epoch,
+                      'end_learning_rate': 0.0,
+                      'power': 0.9
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 4 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+  return config
+def seg_deeplabv3plus_ade20k(backbone: str):
+  config = seg_deeplabv3plus_ade20k_32(backbone)
+  config.task.model.num_classes = 151
+  config.trainer.optimizer_config.learning_rate.polynomial.power = 1.1
+  config.task.model.decoder.aspp.num_filters = 160
+  config.task.model.head.low_level_num_filters = 64
+  return config
+# Experiment configs for 32 output classes
+@exp_factory.register_config_factory(
+    'deeplabv3plus_mobilenet_edgetpuv2_m_ade20k_32')
+def deeplabv3plus_mobilenet_edgetpuv2_m_ade20k_32() -> cfg.ExperimentConfig:
+  return seg_deeplabv3plus_ade20k_32('mobilenet_edgetpu_v2_m')
+@exp_factory.register_config_factory(
+    'deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32')
+def deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32() -> cfg.ExperimentConfig:
+  return seg_deeplabv3plus_ade20k_32('mobilenet_edgetpu_v2_s')
+@exp_factory.register_config_factory(
+    'deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32')
+def deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32() -> cfg.ExperimentConfig:
+  return seg_deeplabv3plus_ade20k_32('mobilenet_edgetpu_v2_xs')
+# Experiment configs for 151 output classes
+@exp_factory.register_config_factory(
+    'deeplabv3plus_mobilenet_edgetpuv2_m_ade20k')
+def deeplabv3plus_mobilenet_edgetpuv2_m_ade20k() -> cfg.ExperimentConfig:
+  config = seg_deeplabv3plus_ade20k('mobilenet_edgetpu_v2_m')
+  return config
+@exp_factory.register_config_factory(
+    'deeplabv3plus_mobilenet_edgetpuv2_s_ade20k')
+def deeplabv3plus_mobilenet_edgetpuv2_s_ade20k() -> cfg.ExperimentConfig:
+  config = seg_deeplabv3plus_ade20k('mobilenet_edgetpu_v2_s')
+  return config
+@exp_factory.register_config_factory(
+    'deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k')
+def deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k() -> cfg.ExperimentConfig:
+  config = seg_deeplabv3plus_ade20k('mobilenet_edgetpu_v2_xs')
+  return config
--- a/official/projects/edgetpu/vision/configs/semantic_segmentation_searched_config.py
+++ b/official/projects/edgetpu/vision/configs/semantic_segmentation_searched_config.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=line-too-long
+# type: ignore
+"""Semantic segmentation configuration definition for AutoML built models."""
+import dataclasses
+import os
+from typing import Any, List, Optional, Mapping
+# Import libraries
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.modeling import optimization
+from official.vision.beta.configs import semantic_segmentation as base_cfg
+from official.vision.beta.configs.google import backbones
+# ADE 20K Dataset
+ADE20K_TRAIN_EXAMPLES = 20210
+ADE20K_VAL_EXAMPLES = 2000
+ADE20K_INPUT_PATH_BASE = 'gs://**/ADE20K'
+PRETRAINED_CKPT_PATH_BASE = 'gs://**/placeholder_for_edgetpu_models/pretrained_checkpoints'
+BACKBONE_PRETRAINED_CHECKPOINT = {
+    'autoseg_edgetpu_backbone_xs':
+        PRETRAINED_CKPT_PATH_BASE +
+        '/autoseg_edgetpu_backbone_xs/ckpt-171600',
+    'autoseg_edgetpu_backbone_s':
+        PRETRAINED_CKPT_PATH_BASE +
+        '/autoseg_edgetpu_backbone_s/ckpt-171600',
+    'autoseg_edgetpu_backbone_m':
+        PRETRAINED_CKPT_PATH_BASE +
+        '/autoseg_edgetpu_backbone_m/ckpt-171600',
+}
+@dataclasses.dataclass
+class BiFPNHeadConfig(hyperparams.Config):
+  """BiFPN-based segmentation head config."""
+  min_level: int = 3
+  max_level: int = 8
+  fpn_num_filters: int = 96
+@dataclasses.dataclass
+class Losses(hyperparams.Config):
+  label_smoothing: float = 0.0
+  ignore_label: int = 255
+  class_weights: List[float] = dataclasses.field(default_factory=list)
+  l2_weight_decay: float = 0.0
+  use_groundtruth_dimension: bool = True
+  top_k_percent_pixels: float = 1.0
+@dataclasses.dataclass
+class AutosegEdgeTPUModelConfig(hyperparams.Config):
+  """Autoseg-EdgeTPU segmentation model config."""
+  num_classes: int = 0
+  input_size: List[int] = dataclasses.field(default_factory=list)
+  backbone: backbones.Backbone = backbones.Backbone()
+  head: BiFPNHeadConfig = BiFPNHeadConfig()
+  model_params: Mapping[str, Any] = dataclasses.field(
+      default_factory=lambda: {  # pylint: disable=g-long-lambda
+          'model_name': 'autoseg_edgetpu_backbone_s',
+          'checkpoint_format': 'tf_checkpoint',
+          'overrides': {
+              'batch_norm': 'tpu',
+              'rescale_input': False,
+              'backbone_only': True,
+              'resolution': 512
+          }
+      })
+@dataclasses.dataclass
+class AutosegEdgeTPUTaskConfig(base_cfg.SemanticSegmentationTask):
+  """The task config inherited from the base segmentation task."""
+  model: AutosegEdgeTPUModelConfig = AutosegEdgeTPUModelConfig()
+  train_data: base_cfg.DataConfig = base_cfg.DataConfig(is_training=True)
+  validation_data: base_cfg.DataConfig = base_cfg.DataConfig(is_training=False)
+  losses: Losses = Losses()
+  init_checkpoint: Optional[str] = None
+  init_checkpoint_modules: str = 'backbone'  # all or backbone
+  model_output_keys: Optional[List[int]] = dataclasses.field(
+      default_factory=list)
+def autoseg_edgetpu_experiment_config(backbone_name: str,
+                                      init_backbone: bool = True
+                                     ) -> cfg.ExperimentConfig:
+  """Experiment using the semantic segmenatation searched model.
+  Args:
+    backbone_name: Name of the backbone used for this model
+    init_backbone: Whether to initialize backbone from a pretrained checkpoint
+  Returns:
+    ExperimentConfig
+  """
+  epochs = 300
+  train_batch_size = 64
+  eval_batch_size = 32
+  image_size = 512
+  steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size
+  train_steps = epochs * steps_per_epoch
+  model_config = AutosegEdgeTPUModelConfig(
+      num_classes=32, input_size=[image_size, image_size, 3])
+  model_config.model_params.model_name = backbone_name
+  if init_backbone:
+    model_config.model_params.model_weights_path = (
+        BACKBONE_PRETRAINED_CHECKPOINT[backbone_name])
+  model_config.model_params.overrides.resolution = image_size
+  config = cfg.ExperimentConfig(
+      task=AutosegEdgeTPUTaskConfig(
+          model=model_config,
+          train_data=base_cfg.DataConfig(
+              input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'),
+              output_size=[image_size, image_size],
+              is_training=True,
+              global_batch_size=train_batch_size,
+              aug_scale_min=0.5,
+              aug_scale_max=2.0),
+          validation_data=base_cfg.DataConfig(
+              input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'),
+              output_size=[image_size, image_size],
+              is_training=False,
+              resize_eval_groundtruth=True,
+              drop_remainder=True,
+              global_batch_size=eval_batch_size),
+          evaluation=base_cfg.Evaluation(report_train_mean_iou=False)),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch * 5,
+          max_to_keep=10,
+          train_steps=train_steps,
+          validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'sgd',
+                  'sgd': {
+                      'nesterov': True,
+                      'momentum': 0.9,
+                  }
+              },
+              'ema': {
+                  'average_decay': 0.9998,
+                  'trainable_weights_only': False,
+              },
+              'learning_rate': {
+                  'type': 'cosine',
+                  'cosine': {
+                      'initial_learning_rate': 0.12,
+                      'decay_steps': train_steps
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 5 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              },
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+  return config
+# Registration for searched segmentation model.
+@exp_factory.register_config_factory('autoseg_edgetpu_xs')
+def autoseg_edgetpu_xs() -> cfg.ExperimentConfig:
+  return autoseg_edgetpu_experiment_config('autoseg_edgetpu_backbone_xs')
+# Registration for searched segmentation model.
+@exp_factory.register_config_factory('autoseg_edgetpu_s')
+def autoseg_edgetpu_s() -> cfg.ExperimentConfig:
+  return autoseg_edgetpu_experiment_config('autoseg_edgetpu_backbone_s')
+# Registration for searched segmentation model.
+@exp_factory.register_config_factory('autoseg_edgetpu_m')
+def autoseg_edgetpu_m() -> cfg.ExperimentConfig:
+  return autoseg_edgetpu_experiment_config('autoseg_edgetpu_backbone_m')
--- a/official/projects/edgetpu/vision/dataloaders/__init__.py
+++ b/official/projects/edgetpu/vision/dataloaders/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/official/projects/edgetpu/vision/dataloaders/classification_input.py
+++ b/official/projects/edgetpu/vision/dataloaders/classification_input.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Classification decoder and parser."""
+# Import libraries
+import tensorflow as tf
+from official.vision.beta.dataloaders import classification_input
+from official.vision.beta.ops import preprocess_ops
+MEAN_RGB = (0.5 * 255, 0.5 * 255, 0.5 * 255)
+STDDEV_RGB = (0.5 * 255, 0.5 * 255, 0.5 * 255)
+def random_crop_image(image,
+                      aspect_ratio_range=(0.75, 1.33),
+                      area_range=(0.05, 1.0),
+                      max_attempts=100):
+  """Randomly crop an arbitrary shaped slice from the input image.
+  Args:
+    image: a Tensor of shape [height, width, 3] representing the input image.
+    aspect_ratio_range: a list of floats. The cropped area of the image must
+      have an aspect ratio = width / height within this range.
+    area_range: a list of floats. The cropped reas of the image must contain
+      a fraction of the input image within this range.
+    max_attempts: the number of attempts at generating a cropped region of the
+      image of the specified constraints. After max_attempts failures, return
+      the entire image.
+  Returns:
+    cropped_image: a Tensor representing the random cropped image. Can be the
+      original image if max_attempts is exhausted.
+  """
+  with tf.name_scope('random_crop_image'):
+    crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box(
+        tf.shape(image),
+        tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
+        min_object_covered=0.1,
+        aspect_ratio_range=aspect_ratio_range,
+        area_range=area_range,
+        max_attempts=max_attempts,
+        use_image_if_no_bounding_boxes=True)
+    cropped_image = tf.slice(image, crop_offset, crop_size)
+    return cropped_image
+def random_crop_image_v2(image_bytes,
+                         image_shape,
+                         aspect_ratio_range=(0.75, 1.33),
+                         area_range=(0.05, 1.0),
+                         max_attempts=100):
+  """Randomly crop an arbitrary shaped slice from the input image.
+  This is a faster version of `random_crop_image` which takes the original
+  image bytes and image size as the inputs, and partially decode the JPEG
+  bytes according to the generated crop.
+  Args:
+    image_bytes: a Tensor of type string representing the raw image bytes.
+    image_shape: a Tensor specifying the shape of the raw image.
+    aspect_ratio_range: a list of floats. The cropped area of the image must
+      have an aspect ratio = width / height within this range.
+    area_range: a list of floats. The cropped reas of the image must contain
+      a fraction of the input image within this range.
+    max_attempts: the number of attempts at generating a cropped region of the
+      image of the specified constraints. After max_attempts failures, return
+      the entire image.
+  Returns:
+    cropped_image: a Tensor representing the random cropped image. Can be the
+      original image if max_attempts is exhausted.
+  """
+  with tf.name_scope('random_crop_image_v2'):
+    crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box(
+        image_shape,
+        tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
+        min_object_covered=0.1,
+        aspect_ratio_range=aspect_ratio_range,
+        area_range=area_range,
+        max_attempts=max_attempts,
+        use_image_if_no_bounding_boxes=True)
+    offset_y, offset_x, _ = tf.unstack(crop_offset)
+    crop_height, crop_width, _ = tf.unstack(crop_size)
+    crop_window = tf.stack([offset_y, offset_x, crop_height, crop_width])
+    cropped_image = tf.image.decode_and_crop_jpeg(
+        image_bytes, crop_window, channels=3)
+    return cropped_image
+class Decoder(classification_input.Decoder):
+  """A tf.Example decoder for classification task."""
+  pass
+class Parser(classification_input.Parser):
+  """Parser to parse an image and its annotations into a dictionary of tensors."""
+  def _parse_train_image(self, decoded_tensors):
+    """Parses image data for training."""
+    image_bytes = decoded_tensors[self._image_field_key]
+    if self._decode_jpeg_only:
+      image_shape = tf.image.extract_jpeg_shape(image_bytes)
+      # Crops image.
+      cropped_image = random_crop_image_v2(
+          image_bytes, image_shape)
+      image = tf.cond(
+          tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
+          lambda: preprocess_ops.center_crop_image_v2(image_bytes, image_shape),
+          lambda: cropped_image)
+    else:
+      # Decodes image.
+      image = tf.io.decode_image(image_bytes, channels=3)
+      image.set_shape([None, None, 3])
+      # Crops image.
+      cropped_image = random_crop_image(image)
+      image = tf.cond(
+          tf.reduce_all(tf.equal(tf.shape(cropped_image), tf.shape(image))),
+          lambda: preprocess_ops.center_crop_image(image),
+          lambda: cropped_image)
+    if self._aug_rand_hflip:
+      image = tf.image.random_flip_left_right(image)
+    # Resizes image.
+    image = tf.image.resize(
+        image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
+    # Apply autoaug or randaug.
+    if self._augmenter is not None:
+      image = self._augmenter.distort(image)
+    # Normalizes image with mean and std pixel values.
+    image = preprocess_ops.normalize_image(image,
+                                           offset=MEAN_RGB,
+                                           scale=STDDEV_RGB)
+    # Convert image to self._dtype.
+    image = tf.image.convert_image_dtype(image, self._dtype)
+    return image
+  def _parse_eval_image(self, decoded_tensors):
+    """Parses image data for evaluation."""
+    image_bytes = decoded_tensors[self._image_field_key]
+    if self._decode_jpeg_only:
+      image_shape = tf.image.extract_jpeg_shape(image_bytes)
+      # Center crops.
+      image = preprocess_ops.center_crop_image_v2(image_bytes, image_shape)
+    else:
+      # Decodes image.
+      image = tf.io.decode_image(image_bytes, channels=3)
+      image.set_shape([None, None, 3])
+      # Center crops.
+      image = preprocess_ops.center_crop_image(image)
+    image = tf.image.resize(
+        image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
+    # Normalizes image with mean and std pixel values.
+    image = preprocess_ops.normalize_image(image,
+                                           offset=MEAN_RGB,
+                                           scale=STDDEV_RGB)
+    # Convert image to self._dtype.
+    image = tf.image.convert_image_dtype(image, self._dtype)
+    return image
--- a/official/projects/edgetpu/vision/dataloaders/classification_input_test.py
+++ b/official/projects/edgetpu/vision/dataloaders/classification_input_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests classification_input.py."""
+from absl.testing import parameterized
+import tensorflow as tf
+from official.projects.edgetpu.vision.dataloaders import classification_input
+from official.vision.beta.configs import common
+from official.vision.beta.dataloaders import tfexample_utils
+IMAGE_FIELD_KEY = 'image/encoded'
+LABEL_FIELD_KEY = 'image/class/label'
+class DecoderTest(tf.test.TestCase, parameterized.TestCase):
+  @parameterized.parameters(
+      (100, 100, 0),
+      (100, 100, 1),
+      (100, 100, 2),
+  )
+  def test_decoder(self, image_height, image_width, num_instances):
+    decoder = classification_input.Decoder(
+        image_field_key=IMAGE_FIELD_KEY, label_field_key=LABEL_FIELD_KEY)
+    serialized_example = tfexample_utils.create_classification_example(
+        image_height, image_width)
+    decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example))
+    results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
+    self.assertCountEqual([IMAGE_FIELD_KEY, LABEL_FIELD_KEY], results.keys())
+    self.assertEqual(0, results[LABEL_FIELD_KEY])
+class ParserTest(parameterized.TestCase, tf.test.TestCase):
+  @parameterized.parameters(
+      ([224, 224, 3], 'float32', True, 'autoaug', False, True, 'JPEG'),
+      ([224, 224, 3], 'float16', True, 'randaug', False, False, 'PNG'),
+      ([224, 224, 3], 'float32', False, None, False, True, 'JPEG'),
+      ([224, 224, 3], 'float16', False, None, False, False, 'PNG'),
+      ([512, 640, 3], 'float32', True, 'randaug', False, False, 'JPEG'),
+      ([512, 640, 3], 'float16', True, 'autoaug', False, False, 'PNG'),
+      ([512, 640, 3], 'float32', False, None, False, True, 'JPEG'),
+      ([512, 640, 3], 'float16', False, None, False, False, 'PNG'),
+      ([640, 640, 3], 'float32', True, None, False, False, 'JPEG'),
+      ([640, 640, 3], 'bfloat16', True, None, False, False, 'PNG'),
+      ([640, 640, 3], 'float32', False, None, False, False, 'JPEG'),
+      ([640, 640, 3], 'bfloat16', False, None, False, False, 'PNG'),
+      ([224, 224, 3], 'float32', True, 'autoaug', True, True, 'JPEG'),
+      ([224, 224, 3], 'float16', True, 'randaug', True, False, 'PNG'),
+  )
+  def test_parser(self, output_size, dtype, is_training, aug_name,
+                  is_multilabel, decode_jpeg_only, image_format):
+    serialized_example = tfexample_utils.create_classification_example(
+        output_size[0], output_size[1], image_format, is_multilabel)
+    if aug_name == 'randaug':
+      aug_type = common.Augmentation(
+          type=aug_name, randaug=common.RandAugment(magnitude=10))
+    elif aug_name == 'autoaug':
+      aug_type = common.Augmentation(
+          type=aug_name, autoaug=common.AutoAugment(augmentation_name='test'))
+    else:
+      aug_type = None
+    decoder = classification_input.Decoder(
+        image_field_key=IMAGE_FIELD_KEY, label_field_key=LABEL_FIELD_KEY,
+        is_multilabel=is_multilabel)
+    parser = classification_input.Parser(
+        output_size=output_size[:2],
+        num_classes=10,
+        image_field_key=IMAGE_FIELD_KEY,
+        label_field_key=LABEL_FIELD_KEY,
+        is_multilabel=is_multilabel,
+        decode_jpeg_only=decode_jpeg_only,
+        aug_rand_hflip=False,
+        aug_type=aug_type,
+        dtype=dtype)
+    decoded_tensors = decoder.decode(serialized_example)
+    image, label = parser.parse_fn(is_training)(decoded_tensors)
+    self.assertAllEqual(image.numpy().shape, output_size)
+    if not is_multilabel:
+      self.assertAllEqual(label, 0)
+    else:
+      self.assertAllEqual(label.numpy().shape, [10])
+    if dtype == 'float32':
+      self.assertAllEqual(image.dtype, tf.float32)
+    elif dtype == 'float16':
+      self.assertAllEqual(image.dtype, tf.float16)
+    elif dtype == 'bfloat16':
+      self.assertAllEqual(image.dtype, tf.bfloat16)
+if __name__ == '__main__':
+  tf.test.main()