Commit 9114f2a3 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower Committed by saberkun
Browse files

Internal change

PiperOrigin-RevId: 404080616
parent ec0d7d0b
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build MobileBERT-EdgeTPU model."""
from typing import Optional
import tensorflow as tf
from official.modeling import tf_utils
from official.nlp import modeling
from official.projects.edgetpu.nlp.configs import params
from official.projects.edgetpu.nlp.modeling import encoder as edgetpu_encoder
from official.projects.edgetpu.nlp.modeling import pretrainer as edgetpu_pretrainer
def build_bert_pretrainer(pretrainer_cfg: params.PretrainerModelParams,
encoder: Optional[tf.keras.Model] = None,
masked_lm: Optional[tf.keras.Model] = None,
quantization_friendly: Optional[bool] = False,
name: Optional[str] = None) -> tf.keras.Model:
"""Builds pretrainer.
Args:
pretrainer_cfg: configs for the pretrainer model.
encoder: (Optional) The encoder network for the pretrainer model.
masked_lm: (Optional) The masked_lm network for the pretrainer model.
quantization_friendly: (Optional) If enabled, the model will use EdgeTPU
mobilebert transformer. The difference is we have a customized softmax
ops which use -120 as the mask value, which is more stable for post-
training quantization.
name: (Optional) Name of the pretrainer model.
Returns:
The pretrainer model.
"""
encoder_cfg = pretrainer_cfg.encoder.mobilebert
encoder = encoder or edgetpu_encoder.MobileBERTEncoder(
word_vocab_size=encoder_cfg.word_vocab_size,
word_embed_size=encoder_cfg.word_embed_size,
type_vocab_size=encoder_cfg.type_vocab_size,
max_sequence_length=encoder_cfg.max_sequence_length,
num_blocks=encoder_cfg.num_blocks,
hidden_size=encoder_cfg.hidden_size,
num_attention_heads=encoder_cfg.num_attention_heads,
intermediate_size=encoder_cfg.intermediate_size,
intermediate_act_fn=encoder_cfg.hidden_activation,
hidden_dropout_prob=encoder_cfg.hidden_dropout_prob,
attention_probs_dropout_prob=encoder_cfg.attention_probs_dropout_prob,
intra_bottleneck_size=encoder_cfg.intra_bottleneck_size,
initializer_range=encoder_cfg.initializer_range,
use_bottleneck_attention=encoder_cfg.use_bottleneck_attention,
key_query_shared_bottleneck=encoder_cfg.key_query_shared_bottleneck,
num_feedforward_networks=encoder_cfg.num_feedforward_networks,
normalization_type=encoder_cfg.normalization_type,
classifier_activation=encoder_cfg.classifier_activation,
input_mask_dtype=encoder_cfg.input_mask_dtype,
quantization_friendly=quantization_friendly)
if pretrainer_cfg.cls_heads:
cls_heads = [
modeling.layers.ClassificationHead(**cfg.as_dict())
for cfg in pretrainer_cfg.cls_heads
]
else:
cls_heads = []
# Get the embedding table from the encoder model.
def _get_embedding_table(encoder):
for layer in encoder.layers:
if layer.name.startswith('mobile_bert_embedding'):
return layer.word_embedding.embeddings
raise ValueError('Can not find embedding layer in the encoder.')
masked_lm = masked_lm or modeling.layers.MobileBertMaskedLM(
embedding_table=_get_embedding_table(encoder),
activation=tf_utils.get_activation(pretrainer_cfg.mlm_activation),
initializer=tf.keras.initializers.TruncatedNormal(
stddev=pretrainer_cfg.mlm_initializer_range),
name='cls/predictions')
pretrainer = edgetpu_pretrainer.MobileBERTEdgeTPUPretrainer(
encoder_network=encoder,
classification_heads=cls_heads,
customized_masked_lm=masked_lm,
name=name)
return pretrainer
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mobilebert_edgetpu.model_builder.py."""
import tensorflow as tf
from official.nlp import modeling
from official.nlp.configs import encoders
from official.projects.edgetpu.nlp.configs import params
from official.projects.edgetpu.nlp.modeling import model_builder
class ModelBuilderTest(tf.test.TestCase):
def setUp(self):
super(ModelBuilderTest, self).setUp()
self.pretrainer_config = params.PretrainerModelParams(
encoder=encoders.EncoderConfig(type='mobilebert'))
def test_default_initialization(self):
"""Initializes pretrainer model from stratch."""
pretrainer = model_builder.build_bert_pretrainer(
pretrainer_cfg=self.pretrainer_config,
name='test_model')
# Makes sure the pretrainer variables are created.
_ = pretrainer(pretrainer.inputs)
self.assertEqual(pretrainer.name, 'test_model')
encoder = pretrainer.encoder_network
default_number_layer = encoders.MobileBertEncoderConfig().num_blocks
encoder_transformer_layer_counter = 0
for layer in encoder.layers:
if isinstance(layer, modeling.layers.MobileBertTransformer):
encoder_transformer_layer_counter += 1
self.assertEqual(default_number_layer, encoder_transformer_layer_counter)
def test_initialization_with_encoder(self):
"""Initializes pretrainer model with an existing encoder network."""
encoder = encoders.build_encoder(
config=encoders.EncoderConfig(type='mobilebert'))
pretrainer = model_builder.build_bert_pretrainer(
pretrainer_cfg=self.pretrainer_config,
encoder=encoder)
encoder_network = pretrainer.encoder_network
self.assertEqual(encoder_network, encoder)
def test_initialization_with_mlm(self):
"""Initializes pretrainer model with an existing MLM head."""
embedding = modeling.layers.MobileBertEmbedding(
word_vocab_size=30522,
word_embed_size=128,
type_vocab_size=2,
output_embed_size=encoders.MobileBertEncoderConfig().hidden_size)
dummy_input = tf.keras.layers.Input(
shape=(None,), dtype=tf.int32)
_ = embedding(dummy_input)
embedding_table = embedding.word_embedding.embeddings
mlm_layer = modeling.layers.MobileBertMaskedLM(
embedding_table=embedding_table)
pretrainer = model_builder.build_bert_pretrainer(
pretrainer_cfg=self.pretrainer_config,
masked_lm=mlm_layer)
mlm_network = pretrainer.masked_lm
self.assertEqual(mlm_network, mlm_layer)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""BERT Pre-training model."""
# pylint: disable=g-classes-have-attributes
import copy
from typing import List, Optional
import tensorflow as tf
from official.nlp.modeling import layers
@tf.keras.utils.register_keras_serializable(package='Text')
class MobileBERTEdgeTPUPretrainer(tf.keras.Model):
"""BERT pretraining model V2.
Adds the masked language model head and optional classification heads upon the
transformer encoder.
Args:
encoder_network: A transformer network. This network should output a
sequence output and a classification output.
mlm_activation: The activation (if any) to use in the masked LM network. If
None, no activation will be used.
mlm_initializer: The initializer (if any) to use in the masked LM. Default
to a Glorot uniform initializer.
classification_heads: A list of optional head layers to transform on encoder
sequence outputs.
customized_masked_lm: A customized masked_lm layer. If None, will create
a standard layer from `layers.MaskedLM`; if not None, will use the
specified masked_lm layer. Above arguments `mlm_activation` and
`mlm_initializer` will be ignored.
name: The name of the model.
Inputs: Inputs defined by the encoder network, plus `masked_lm_positions` as a
dictionary.
Outputs: A dictionary of `lm_output`, classification head outputs keyed by
head names, and also outputs from `encoder_network`, keyed by
`sequence_output` and `encoder_outputs` (if any).
"""
def __init__(
self,
encoder_network: tf.keras.Model,
mlm_activation=None,
mlm_initializer='glorot_uniform',
classification_heads: Optional[List[tf.keras.layers.Layer]] = None,
customized_masked_lm: Optional[tf.keras.layers.Layer] = None,
name: str = 'bert',
**kwargs):
inputs = copy.copy(encoder_network.inputs)
outputs = {}
encoder_network_outputs = encoder_network(inputs)
if isinstance(encoder_network_outputs, list):
outputs['pooled_output'] = encoder_network_outputs[1]
if isinstance(encoder_network_outputs[0], list):
outputs['encoder_outputs'] = encoder_network_outputs[0]
outputs['sequence_output'] = encoder_network_outputs[0][-1]
else:
outputs['sequence_output'] = encoder_network_outputs[0]
elif isinstance(encoder_network_outputs, dict):
outputs = encoder_network_outputs
else:
raise ValueError('encoder_network\'s output should be either a list '
'or a dict, but got %s' % encoder_network_outputs)
masked_lm_positions = tf.keras.layers.Input(
shape=(None,), name='masked_lm_positions', dtype=tf.int32)
inputs.append(masked_lm_positions)
masked_lm_layer = customized_masked_lm or layers.MaskedLM(
embedding_table=encoder_network.get_embedding_table(),
activation=mlm_activation,
initializer=mlm_initializer,
name='cls/predictions')
sequence_output = outputs['sequence_output']
outputs['mlm_logits'] = masked_lm_layer(
sequence_output, masked_positions=masked_lm_positions)
classification_head_layers = classification_heads or []
for cls_head in classification_head_layers:
cls_outputs = cls_head(sequence_output)
if isinstance(cls_outputs, dict):
outputs.update(cls_outputs)
else:
outputs[cls_head.name] = cls_outputs
super(MobileBERTEdgeTPUPretrainer, self).__init__(
inputs=inputs,
outputs=outputs,
name=name,
**kwargs)
self._config = {
'encoder_network': encoder_network,
'mlm_activation': mlm_activation,
'mlm_initializer': mlm_initializer,
'classification_heads': classification_heads,
'customized_masked_lm': customized_masked_lm,
'name': name,
}
self.encoder_network = encoder_network
self.masked_lm = masked_lm_layer
self.classification_heads = classification_head_layers
@property
def checkpoint_items(self):
"""Returns a dictionary of items to be additionally checkpointed."""
items = dict(encoder=self.encoder_network, masked_lm=self.masked_lm)
for head in self.classification_heads:
for key, item in head.checkpoint_items.items():
items['.'.join([head.name, key])] = item
return items
def get_config(self):
return self._config
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for BERT pretrainer model."""
import itertools
from absl.testing import parameterized
import tensorflow as tf
from official.nlp.modeling import layers
from official.nlp.modeling import networks
from official.projects.edgetpu.nlp.modeling import pretrainer
class MobileBERTEdgeTPUPretrainerTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(itertools.product([True, False],
[True, False],
[True, False]))
def test_mobilebert_edgetpu_pretrainer(
self,
dict_outputs,
return_all_encoder_outputs,
use_customized_masked_lm):
"""Validate that the Keras object can be created."""
# Build a transformer network to use within the BERT trainer.
vocab_size = 100
sequence_length = 512
hidden_size = 48
num_layers = 2
test_network = networks.BertEncoder(
vocab_size=vocab_size,
num_layers=num_layers,
hidden_size=hidden_size,
max_sequence_length=sequence_length,
return_all_encoder_outputs=return_all_encoder_outputs,
dict_outputs=dict_outputs)
# Create a BERT trainer with the created network.
if use_customized_masked_lm:
customized_masked_lm = layers.MaskedLM(
embedding_table=test_network.get_embedding_table())
else:
customized_masked_lm = None
bert_trainer_model = pretrainer.MobileBERTEdgeTPUPretrainer(
encoder_network=test_network, customized_masked_lm=customized_masked_lm)
num_token_predictions = 20
# Create a set of 2-dimensional inputs (the first dimension is implicit).
inputs = dict(
input_word_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
input_mask=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
input_type_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32))
inputs['masked_lm_positions'] = tf.keras.Input(
shape=(num_token_predictions,), dtype=tf.int32)
# Invoke the trainer model on the inputs. This causes the layer to be built.
outputs = bert_trainer_model(inputs)
has_encoder_outputs = dict_outputs or return_all_encoder_outputs
expected_keys = ['sequence_output', 'pooled_output']
if has_encoder_outputs:
expected_keys.append('encoder_outputs')
expected_keys.append('mlm_logits')
self.assertSameElements(outputs.keys(), expected_keys)
# Validate that the outputs are of the expected shape.
expected_lm_shape = [None, num_token_predictions, vocab_size]
self.assertAllEqual(expected_lm_shape,
outputs['mlm_logits'].shape.as_list())
expected_sequence_output_shape = [None, sequence_length, hidden_size]
self.assertAllEqual(expected_sequence_output_shape,
outputs['sequence_output'].shape.as_list())
expected_pooled_output_shape = [None, hidden_size]
self.assertAllEqual(expected_pooled_output_shape,
outputs['pooled_output'].shape.as_list())
def test_multiple_cls_outputs(self):
"""Validate that the Keras object can be created."""
# Build a transformer network to use within the BERT trainer.
vocab_size = 100
sequence_length = 512
hidden_size = 48
num_layers = 2
test_network = networks.BertEncoder(
vocab_size=vocab_size,
num_layers=num_layers,
hidden_size=hidden_size,
max_sequence_length=sequence_length,
dict_outputs=True)
bert_trainer_model = pretrainer.MobileBERTEdgeTPUPretrainer(
encoder_network=test_network,
classification_heads=[layers.MultiClsHeads(
inner_dim=5, cls_list=[('foo', 2), ('bar', 3)])])
num_token_predictions = 20
# Create a set of 2-dimensional inputs (the first dimension is implicit).
inputs = dict(
input_word_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
input_mask=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
input_type_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
masked_lm_positions=tf.keras.Input(
shape=(num_token_predictions,), dtype=tf.int32))
# Invoke the trainer model on the inputs. This causes the layer to be built.
outputs = bert_trainer_model(inputs)
self.assertEqual(outputs['foo'].shape.as_list(), [None, 2])
self.assertEqual(outputs['bar'].shape.as_list(), [None, 3])
def test_v2_serialize_deserialize(self):
"""Validate that the BERT trainer can be serialized and deserialized."""
# Build a transformer network to use within the BERT trainer. (Here, we use
# a short sequence_length for convenience.)
test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
# Create a BERT trainer with the created network. (Note that all the args
# are different, so we can catch any serialization mismatches.)
bert_trainer_model = pretrainer.MobileBERTEdgeTPUPretrainer(
encoder_network=test_network)
# Create another BERT trainer via serialization and deserialization.
config = bert_trainer_model.get_config()
new_bert_trainer_model = pretrainer.MobileBERTEdgeTPUPretrainer.from_config(
config)
# Validate that the config can be forced to JSON.
_ = new_bert_trainer_model.to_json()
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(bert_trainer_model.get_config(),
new_bert_trainer_model.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MobileBERT-EdgeTPU model runner."""
import os
from absl import app
from absl import flags
from absl import logging
import orbit
import tensorflow as tf
from official.common import distribute_utils
from official.common import flags as tfm_flags
from official.projects.edgetpu.nlp import mobilebert_edgetpu_trainer
from official.projects.edgetpu.nlp.configs import params
from official.projects.edgetpu.nlp.modeling import model_builder
from official.projects.edgetpu.nlp.utils import utils
FLAGS = flags.FLAGS
def main(_):
# Set up experiment params and load the configs from file/files.
experiment_params = params.EdgeTPUBERTCustomParams()
experiment_params = utils.config_override(experiment_params, FLAGS)
model_dir = utils.get_model_dir(experiment_params, FLAGS)
distribution_strategy = distribute_utils.get_distribution_strategy(
distribution_strategy=experiment_params.runtime.distribution_strategy,
all_reduce_alg=experiment_params.runtime.all_reduce_alg,
num_gpus=experiment_params.runtime.num_gpus,
tpu_address=experiment_params.runtime.tpu_address)
with distribution_strategy.scope():
teacher_model = model_builder.build_bert_pretrainer(
pretrainer_cfg=experiment_params.teacher_model,
quantization_friendly=False,
name='teacher')
student_model = model_builder.build_bert_pretrainer(
pretrainer_cfg=experiment_params.student_model,
quantization_friendly=True,
name='student')
# Load model weights.
teacher_ckpt_dir_or_file = experiment_params.teacher_model_init_checkpoint
if not teacher_ckpt_dir_or_file:
raise ValueError('`teacher_model_init_checkpoint` is not specified.')
utils.load_checkpoint(teacher_model, teacher_ckpt_dir_or_file)
student_ckpt_dir_or_file = experiment_params.student_model_init_checkpoint
if not student_ckpt_dir_or_file:
# Makes sure the pretrainer variables are created.
_ = student_model(student_model.inputs)
logging.warn('No student checkpoint is provided, training might take '
'much longer before converging.')
else:
utils.load_checkpoint(student_model, student_ckpt_dir_or_file)
runner = mobilebert_edgetpu_trainer.MobileBERTEdgeTPUDistillationTrainer(
teacher_model=teacher_model,
student_model=student_model,
strategy=distribution_strategy,
experiment_params=experiment_params,
export_ckpt_path=model_dir)
# Save checkpoint for preemption handling.
# Checkpoint for downstreaming tasks are saved separately inside the
# runner's train_loop_end() function.
checkpoint = tf.train.Checkpoint(
teacher_model=runner.teacher_model,
student_model=runner.student_model,
layer_wise_optimizer=runner.layer_wise_optimizer,
e2e_optimizer=runner.e2e_optimizer,
current_step=runner.current_step)
checkpoint_manager = tf.train.CheckpointManager(
checkpoint,
directory=model_dir,
max_to_keep=5,
step_counter=runner.current_step,
checkpoint_interval=20000,
init_fn=None)
controller = orbit.Controller(
trainer=runner,
evaluator=runner,
global_step=runner.current_step,
strategy=distribution_strategy,
steps_per_loop=experiment_params.orbit_config.steps_per_loop,
summary_dir=os.path.join(model_dir, 'train'),
eval_summary_dir=os.path.join(model_dir, 'eval'),
checkpoint_manager=checkpoint_manager)
if FLAGS.mode == 'train':
controller.train(steps=experiment_params.orbit_config.total_steps)
else:
raise ValueError('Unsupported mode, only support `train`')
if __name__ == '__main__':
tfm_flags.define_flags()
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=line-too-long
r"""Export tflite for MobileBERT-EdgeTPU with SQUAD head.
Example usage:
python3 export_tflite_squad.py \
--config_file=third_party/tensorflow_models/official/projects/edgetpu/nlp/experiments/mobilebert_edgetpu_xs.yaml \
--export_path=/tmp/ \
--quantization_method=full-integer
"""
# pylint: enable=line-too-long
import os
import tempfile
from typing import Sequence
from absl import app
from absl import flags
from absl import logging
import orbit
import tensorflow as tf
from official.common import flags as tfm_flags
from official.nlp.data import data_loader_factory
from official.nlp.data import question_answering_dataloader
from official.nlp.modeling import models
from official.projects.edgetpu.nlp.configs import params
from official.projects.edgetpu.nlp.modeling import model_builder
from official.projects.edgetpu.nlp.utils import utils
FLAGS = flags.FLAGS
SQUAD_TRAIN_SPLIT = 'gs://**/tp/bert/squad_v1.1/train.tf_record'
flags.DEFINE_string('export_path', '/tmp/',
'File path to store tflite model.')
flags.DEFINE_enum('quantization_method', 'float',
['full-integer', 'hybrid', 'float'], 'Quantization method.')
flags.DEFINE_integer('batch_size', 1,
'Fixed batch size for exported TFLite model.')
flags.DEFINE_integer('sequence_length', 384,
'Fixed sequence length.')
flags.DEFINE_string('model_checkpoint', None,
'Checkpoint path for the model. Model will be initialized'
'with random weights if path is None.')
def build_model_for_serving(model: tf.keras.Model,
sequence_length: int = 384,
batch_size: int = 1) -> tf.keras.Model:
"""Builds MLPerf evaluation compatible models.
To run the model on device, the model input/output datatype and node names
need to match the MLPerf setup.
Args:
model: Input keras model.
sequence_length: BERT model sequence length.
batch_size: Inference batch size.
Returns:
Keras model with new input/output nodes.
"""
word_ids = tf.keras.Input(shape=(sequence_length,),
batch_size=batch_size,
dtype=tf.int32,
name='input_word_ids')
mask = tf.keras.Input(shape=(sequence_length,),
batch_size=batch_size,
dtype=tf.int32, name='input_mask')
type_ids = tf.keras.Input(shape=(sequence_length,),
batch_size=batch_size,
dtype=tf.int32, name='input_type_ids')
model_output = model([word_ids, type_ids, mask])
# Use identity layers wrapped in lambdas to explicitly name the output
# tensors.
start_logits = tf.keras.layers.Lambda(
tf.identity, name='start_positions')(
model_output[0])
end_logits = tf.keras.layers.Lambda(
tf.identity, name='end_positions')(
model_output[1])
model = tf.keras.Model(
inputs=[word_ids, type_ids, mask],
outputs=[start_logits, end_logits])
return model
def build_inputs(data_params, input_context=None):
"""Returns tf.data.Dataset for sentence_prediction task."""
return data_loader_factory.get_data_loader(data_params).load(input_context)
def main(argv: Sequence[str]) -> None:
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
# Set up experiment params and load the configs from file/files.
experiment_params = params.EdgeTPUBERTCustomParams()
experiment_params = utils.config_override(experiment_params, FLAGS)
# change the input mask type to tf.float32 to avoid additional casting op.
experiment_params.student_model.encoder.mobilebert.input_mask_dtype = 'float32'
# Experiments indicate using -120 as the mask value for Softmax is good enough
# for both int8 and bfloat. So we set quantization_friendly to True for both
# quant and float model.
pretrainer_model = model_builder.build_bert_pretrainer(
experiment_params.student_model,
name='pretrainer',
quantization_friendly=True)
encoder_network = pretrainer_model.encoder_network
model = models.BertSpanLabeler(
network=encoder_network,
initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01))
# Load model weights.
if FLAGS.model_checkpoint is not None:
checkpoint_dict = {'model': model}
checkpoint = tf.train.Checkpoint(**checkpoint_dict)
checkpoint.restore(FLAGS.model_checkpoint).assert_existing_objects_matched()
model_for_serving = build_model_for_serving(model)
model_for_serving.summary()
# TODO(b/194449109): Need to save the model to file and then convert tflite
# with 'tf.lite.TFLiteConverter.from_saved_model()' to get the expected
# accuracy
tmp_dir = tempfile.TemporaryDirectory().name
model_for_serving.save(tmp_dir)
def _representative_dataset():
dataset_params = question_answering_dataloader.QADataConfig()
dataset_params.input_path = SQUAD_TRAIN_SPLIT
dataset_params.drop_remainder = False
dataset_params.global_batch_size = 1
dataset_params.is_training = True
dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(),
build_inputs, dataset_params)
for example in dataset.take(100):
inputs = example[0]
input_word_ids = inputs['input_word_ids']
input_mask = inputs['input_mask']
input_type_ids = inputs['input_type_ids']
yield [input_word_ids, input_mask, input_type_ids]
converter = tf.lite.TFLiteConverter.from_saved_model(tmp_dir)
if FLAGS.quantization_method in ['full-integer', 'hybrid']:
converter.optimizations = [tf.lite.Optimize.DEFAULT]
if FLAGS.quantization_method in ['full-integer']:
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.float32
converter.representative_dataset = _representative_dataset
tflite_quant_model = converter.convert()
export_model_path = os.path.join(FLAGS.export_path, 'model.tflite')
with tf.io.gfile.GFile(export_model_path, 'wb') as f:
f.write(tflite_quant_model)
logging.info('Successfully save the tflite to %s', FLAGS.export_path)
if __name__ == '__main__':
tfm_flags.define_flags()
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for export_tflite_squad."""
import tensorflow as tf
from official.nlp.modeling import models
from official.projects.edgetpu.nlp.configs import params
from official.projects.edgetpu.nlp.modeling import model_builder
from official.projects.edgetpu.nlp.serving import export_tflite_squad
class ExportTfliteSquadTest(tf.test.TestCase):
def setUp(self):
super(ExportTfliteSquadTest, self).setUp()
experiment_params = params.EdgeTPUBERTCustomParams()
pretrainer_model = model_builder.build_bert_pretrainer(
experiment_params.student_model, name='pretrainer')
encoder_network = pretrainer_model.encoder_network
self.span_labeler = models.BertSpanLabeler(
network=encoder_network,
initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01))
def test_model_input_output(self):
test_model = export_tflite_squad.build_model_for_serving(self.span_labeler)
# Test model input order, names, and shape.
self.assertEqual(test_model.input[0].name, 'input_word_ids')
self.assertEqual(test_model.input[1].name, 'input_type_ids')
self.assertEqual(test_model.input[2].name, 'input_mask')
self.assertEqual(test_model.input[0].shape, (1, 384))
self.assertEqual(test_model.input[1].shape, (1, 384))
self.assertEqual(test_model.input[2].shape, (1, 384))
# Test model output order, name, and shape.
self.assertEqual(test_model.output[0].name, 'start_positions/Identity:0')
self.assertEqual(test_model.output[1].name, 'end_positions/Identity:0')
self.assertEqual(test_model.output[0].shape, (1, 384))
self.assertEqual(test_model.output[1].shape, (1, 384))
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions."""
import os
import pprint
from absl import logging
import tensorflow as tf
from official.modeling import hyperparams
from official.projects.edgetpu.nlp.configs import params
def serialize_config(experiment_params: params.EdgeTPUBERTCustomParams,
model_dir: str):
"""Serializes and saves the experiment config."""
params_save_path = os.path.join(model_dir, 'params.yaml')
logging.info('Saving experiment configuration to %s', params_save_path)
tf.io.gfile.makedirs(model_dir)
hyperparams.save_params_dict_to_yaml(experiment_params, params_save_path)
# Note: Do not call this utility function unless you load the `flags`
# module in your script.
def config_override(experiment_params, flags_obj):
"""Overrides ExperimentConfig according to flags."""
if not hasattr(flags_obj, 'tpu'):
raise ModuleNotFoundError(
'`tpu` is not found in FLAGS. Need to load flags.py first.')
# Change runtime.tpu to the real tpu.
experiment_params.override({
'runtime': {
'tpu_address': flags_obj.tpu,
}
})
# Get the first level of override from `--config_file`.
# `--config_file` is typically used as a template that specifies the common
# override for a particular experiment.
for config_file in flags_obj.config_file or []:
experiment_params = hyperparams.override_params_dict(
experiment_params, config_file, is_strict=True)
# Get the second level of override from `--params_override`.
# `--params_override` is typically used as a further override over the
# template. For example, one may define a particular template for training
# ResNet50 on ImageNet in a config file and pass it via `--config_file`,
# then define different learning rates and pass it via `--params_override`.
if flags_obj.params_override:
experiment_params = hyperparams.override_params_dict(
experiment_params, flags_obj.params_override, is_strict=True)
experiment_params.validate()
experiment_params.lock()
pp = pprint.PrettyPrinter()
logging.info('Final experiment parameters: %s',
pp.pformat(experiment_params.as_dict()))
model_dir = get_model_dir(experiment_params, flags_obj)
if flags_obj.mode is not None:
if 'train' in flags_obj.mode:
# Pure eval modes do not output yaml files. Otherwise continuous eval job
# may race against the train job for writing the same file.
serialize_config(experiment_params, model_dir)
return experiment_params
def get_model_dir(experiment_params, flags_obj):
"""Gets model dir from Flags."""
del experiment_params
return flags_obj.model_dir
def load_checkpoint(model: tf.keras.Model, ckpt_path: str):
"""Initializes model with the checkpoint."""
ckpt_dir_or_file = ckpt_path
if tf.io.gfile.isdir(ckpt_dir_or_file):
ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
# Makes sure the pretrainer variables are created.
_ = model(model.inputs)
checkpoint = tf.train.Checkpoint(
**model.checkpoint_items)
checkpoint.read(ckpt_dir_or_file).expect_partial()
logging.info('Successfully load parameters for %s model', model.name)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for utils.py."""
from absl import flags
import tensorflow as tf
import yaml
from official.projects.edgetpu.nlp.configs import params
from official.projects.edgetpu.nlp.modeling import model_builder
from official.projects.edgetpu.nlp.utils import utils
FLAGS = flags.FLAGS
# Helper function to compare two nested Dicts.
# Note that this function only ensures all the fields in dict_a have definition
# and same value in dict_b. This function does not guarantee that
# dict_a == dict_b.
def nested_dict_compare(dict_a, dict_b):
for k, v in sorted(dict_a.items()):
if k not in dict_b:
return False
if isinstance(v, dict) and isinstance(dict_b[k], dict):
if not nested_dict_compare(dict_a[k], dict_b[k]):
return False
else:
# A caveat: When dict_a[k] = 1, dict_b[k] = True, the return is True.
if dict_a[k] != dict_b[k]:
return False
return True
class UtilsTest(tf.test.TestCase):
def test_config_override(self):
# Define several dummy flags which are call by the utils.config_override
# function.
file_path = 'third_party/tensorflow_models/official/projects/edgetpu/nlp/experiments/mobilebert_edgetpu_m.yaml'
flags.DEFINE_string('tpu', None, 'tpu_address.')
flags.DEFINE_list('config_file', [file_path],
'A list of config files path.')
flags.DEFINE_string('params_override', None, 'Override params.')
flags.DEFINE_string('model_dir', '/tmp/', 'Model saving directory.')
flags.DEFINE_list('mode', ['train'], 'Job mode.')
flags.DEFINE_bool('use_vizier', False,
'Whether to enable vizier based hyperparameter search.')
experiment_params = params.EdgeTPUBERTCustomParams()
experiment_params = utils.config_override(experiment_params, FLAGS)
experiment_params_dict = experiment_params.as_dict()
with tf.io.gfile.GFile(file_path, 'r') as f:
loaded_dict = yaml.load(f, Loader=yaml.FullLoader)
# experiment_params contains all the configs but the loaded_dict might
# only contains partial of the configs.
self.assertTrue(nested_dict_compare(loaded_dict, experiment_params_dict))
def test_load_checkpoint(self):
"""Test the pretrained model can be successfully loaded."""
experiment_params = params.EdgeTPUBERTCustomParams()
student_pretrainer = experiment_params.student_model
student_pretrainer.encoder.type = 'mobilebert'
pretrainer = model_builder.build_bert_pretrainer(
pretrainer_cfg=student_pretrainer,
name='test_model')
# Makes sure the pretrainer variables are created.
checkpoint_path = self.create_tempfile().full_path
_ = pretrainer(pretrainer.inputs)
pretrainer.save_weights(checkpoint_path)
utils.load_checkpoint(pretrainer, checkpoint_path)
if __name__ == '__main__':
tf.test.main()
# EdgeTPU-optimized Vision Models
## Overview
This project includes computer vision models optimized for Edge TPU featured in
Pixel Phones, Coral Products, and more. These models improve the latency and
energy vs. accuracy pareto-frontier significantly compared to the existing
SOTA models when running on the Edge TPU devices.
## MobileNet-EdgeTPU Classification Models
### Introduction
We are presenting a family of computer vision models based on MobileNetEdgeTPUV2
that are optimized for the next generation Edge TPU ML accelerator in the Google
Tensor SoC that powers the Pixel 6 phones. These models improve the
latency-accuracy pareto-frontier compared to the existing SOTA on-device models
including their predecessor MobileNetEdgeTPUs. MobileNetEdgeTPUV2 can be used as
a standalone image classification model or as a backbone for other computer
vision tasks such as object detection or semantic segmentation.
### Search space design
During the design of MobileNetEdgeTPUV2 we crafted a neural network search space
which includes building blocks that run efficiently on the Edge TPU accelerator
while providing better algorithmic qualities and leveraged AutoML to find the
optimal architectures. As one of the key optimizations, we introduce Group
Convolution based Inverted Bottleneck (IBN) blocks that provide great
flexibility in achieving a tradeoff between latency and accuracy.
Inverted Bottleneck (IBN) is a widely used building block in architecting a
neural network for mobile vision tasks. A conventional IBN uses pointwise
convolutions for expansion/projection before/after a depthwise convolution.
Previously it has been shown that using a full convolution replacing the
pointwise expansion and depthwise convolution can provide more trainable
parameters while being faster. However, one big limitation is that using these
full-convolution IBNs can get very expensive in terms of latency and memory
requirements, especially for narrow/deep tensors that we see in later stages of
vision models. This limits the use of “fused” full-convolution IBNs throughout
the model and leaves depthwise IBN as the only alternative.
<figure align="center">
<img width=70% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-ibn-intro.png>
<figcaption>Inverted bottleneck block (IBN) variants: (a) Conventional with depthwise, (b) Fused-IBN, (c)GC-IBN with group convolutions in the expansion phase</figcaption>
</figure>
In this work we utilize Group Convolution (GC) as part of the fused expansion in
constructing IBNs (Figure 1). GC based IBN becomes a versatile block that opens
up a large design space between conventional depthwise IBNs and fused
full-convolution IBNs which can be controlled by the group size parameter.
Figure 2 demonstrates the search space enabled by GC-based IBNs that allows a
flexible tradeoff between latency and number of trainable parameters. GC-based
IBNs allow increasing the number of trainable parameters gradually without
requiring the latency cost of full-convolution based IBNs. Moreover, they can
also be faster than conventional IBNs with depthwise convolutions while
providing more trainable parameters.
<figure align="center">
<img width=60% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-gc-comparison.png>
</figure>
### Model performance on Edge TPU
Tradeoffs discussed above and exemplified in Figure 2 are highly dependent on
the tensor shapes and cannot be generalized throughout the neural network.
Hence, we use AutoML techniques as a rescue to find the optimal block decisions
and craft a family of network architectures at different latency targets. Figure
3 demonstrates that the resulting MobilenetEdgeTPUV2 model-family improves the
pareto-frontier compared to the existing on-device SOTA models when run on Edge
TPU.
<figure align="center">
<img width=70% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-edgetpu-classification-plot.png>
<figcaption>Comparison of Imagenet top-1 accuracy and Pixel 6 Edge TPU latency of MobilenetEdgeTPUV2 models with other on-device classification models</figcaption>
</figure>
#### On-device benchmarking of classification models
Results on on-device benchmarking of various int8 quantized image classification
models for 224x224 input resolution:
Model | Accuracy (int8) | Pixel 6 Edge TPU Latency (ms)
----------------------- | :-------------- | :----------------------------
MobileNetEdgeTPUv2-Tiny | 74.70% | 0.78
MobileNetEdgeTPUv2-XS | 75.78% | 0.82
MobileNetEdgeTPUv2-S | 77.36% | 1.03
MobileNetEdgeTPUv2-M | 78.34% | 1.35
MobileNetEdgeTPUv2-L | 78.97% | 1.64
MobileNetEdgeTPU dm0.75 | 73.5% | 0.79
MobileNetEdgeTPU dm1.0 | 75.6% | 0.92
MobileNetEdgeTPU dm1.25 | 77.06% | 1.2
MobileNetEdgeTPU dm1.5 | 75.9% | 1.42
MobileNetEdgeTPU dm1.75 | 78.6% | 1.93
### Model performance on Pixel 6 CPU
Our primary optimization target is the Edge TPU accelerator however in our
search space we include operations that also run well on Pixel 6 CPU to be able
to reach a wide range of platforms. Moreover, we implement GC using functionally
equivalent series of commonly used ML primitives (channelwise slice, full
convolution, concatenation) as shown in Figure 2, since a native GC operation
may not be supported for all target platforms. As a result, the performance of
MobilenetEdgeTPUV2 is also superior to other on-device models when run on Pixel
6 CPU as shown in Figure 4.
<figure align="center">
<img width=70% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-cpu-classification-plot.png>
<figcaption>Comparison of Imagenet top-1 accuracy and Pixel 6 latency of MobilenetEdgeTPUV2 models with other on-device classification models</figcaption>
</figure>
## Semantic segmentation task
### Using classification models as backbone
We also present segmentation models based on MobilenetEdgeTPUV2 backbone and
DeepLab v3 plus decoder and head (first used
[here](https://arxiv.org/pdf/1802.02611.pdf)). These models optimized for the
next generation Edge TPU accelerators featured in Pixel 6 phones and improve the
latency-accuracy pareto-frontier compared to the their predecessor based on
MobileNetV2 and DeepLabV3+.
#### Segmentation model design
The segmentation model is built using the pretrained MobilenetEdgeTPUV2 as a
feature encoder and ASPP decoder in conjunction with a Deeplab V3 Plus head.
Separable convolutions used to reduce the size of the model.
<figure align="center">
<img width=60% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-seg-flow.png>
<figcaption></figcaption>
</figure>
The last two levels of the model (bilinear resizing and Argmax) contribute
significantly to latency on the device model. This is due to the large
activation size between these layers (512 x 512 x Number of classes). These
layers can be merged without significantly impacting quality scores by making
Argmax smaller and scaling the classes to the desired size with nearest
neighbor.
<figure align="center">
<img width=60% src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-seg-fused-argmax.png>
</figure>
### On-device benchmarking of segmentation models
<figure align="center">
<img src=https://storage.cloud.google.com/tf_model_garden/models/edgetpu/images/readme-seg-plot.png width=60%>
<figcaption>Performance of AutosegEdgeTPU and MobilenetEdgeTPUV2+DeeplabV3+ models on the 32-class ADE20K semantic segmentation task.</figcaption>
</figure>
| Backbone | Segmentation | #Parameters | ADE20K | Pixel 6 Edge |
: : Head : (million) : 32-class : TPU latency :
: : : : mIOU : (ms) :
| --------------------- | ------------ | ----------- | -------- | ------------ |
| MobileNet V2 | DeeplabV3+ | 2.34 | 54.06% | 7.5 |
: (baseline) : : : : :
| MobilenetEdgeTPUV2-XS | DeeplabV3+ | 3.6 | 56.02% | 5.2 |
| MobilenetEdgeTPUV2-S | DeeplabV3+ | 5.2 | 59.43% | 5.9 |
| MobilenetEdgeTPUV2-M | DeeplabV3+ | 7.7 | 59.81% | 7.2 |
| AutosegEdgeTPU-XS | BiFPN | 2.9 | 59.64% | 5.4 |
| AutosegEdgeTPU-S | BiFPN | 3.1 | 61.31% | 5.7 |
By fusing argmax with resize operator as shown above, it is possible to further
improve the on-device latency of the segmentation models without significantly
impacting the quality:
| Backbone | Segmentation | #Parameters | ADE20K | Pixel 6 Edge |
: : Head : (million) : 32-class : TPU latency :
: : : : mIOU : (ms) :
| --------------------- | ------------ | ----------- | -------- | ------------ |
| MobilenetEdgeTPUV2-XS | DeeplabV3+ | 3.6 | 56% | 3.4 |
| MobilenetEdgeTPUV2-S | DeeplabV3+ | 5.2 | 59.41% | 4.2 |
| MobilenetEdgeTPUV2-M | DeeplabV3+ | 7.7 | 59.79% | 5.5 |
| AutosegEdgeTPU-XS | BiFPN | 2.9 | 59.62% | 3.6 |
| AutosegEdgeTPU-S | BiFPN | 3.1 | 61.28% | 3.9 |
### Training the models
Note that the `EXPERIMENT_TYPE` has to be in one of the preregistered
classification configs, such as `mobilenet_edgetpu_xs` for classification
models. In case you train segmentation model `EXPERIMENT_TYPE` has to be in one
of the preregistered segmentations configs], such as
`seg_deeplabv3plus_mobilenet_edgetpuv2_s_ade20k`, `autoseg_edgetpu_xs`
```
EXPERIMENT_NAME=xxx # Change this for your run, for example, 'mobilenet-edgetpu-test-run'
EXPERIMENT_TYPE=xxx # Change this for your run, for example, 'mobilenet_edgetpu_v2_xs'
$ python3 train.py \
--experiment_name=${EXPERIMENT_NAME} \
--experiment_type=${EXPERIMENT_TYPE} \
--mode=train_and_eval
```
### From training to quantized inference deployment
To export quantized tflite models using tensorflow post-training quantization:
**For classification models**:
```
$ python3 serving/export_tflite.py
--model_name=${EXPERIMENT_TYPE} \
--ckpt_path=${CHECKPOINT} \
--dataset_dir=/path/to/calibration/dataset \
--output_dir=/tmp \
--quantize \
--image_size=224
```
Note that the `EXPERIMENT_TYPE` has to be in one of the preregistered
classification configs, such as `mobilenet_edgetpu_xs`.
**For segmentation models**:
```
$ python3 serving/export_tflite.py \
--model_name=${EXPERIMENT_TYPE}
--ckpt_path=${CHECKPOINT} \
--dataset_dir=/path/to/calibration/dataset \
--output_dir=/tmp \
--quantize \
--quantize_less_restrictive \
--image_size=512 \
--finalize_method=${ARGMAX_FUSION}
```
`EXPERIMENT_TYPE` has to be in one of the preregistered segmentations configs,
such as `deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32`.
`ARGMAX_FUSION` has to be in one of the following:
- `resize512,argmax`: Argmax applied after scaling the output to 512x512.
- `resize256,argmax,resize512,squeeze`: Scale the output to 256x256, apply
argmax, scale to 512x512 using nearest neighbor upsampling
- `resize128,argmax,resize512,squeeze`: Scale the output to 128x128, apply
argmax, scale to 512x512 using nearest neighbor upsampling
### On-device benchmarking
The models in this repository are compatible with NNAPI and can be benchmarked
on Pixel 6 devices using the
[tflite benchmark tool](https://www.tensorflow.org/lite/performance/measurement)
While using the benchmark tool, enable the use of NNAPI by setting the
`use_nnapi` command line argument to `true`, and specifying the
`nnapi_accelerator` as `google-edgetpu`
```
$ bazel build -c opt --config=android_arm64 tensorflow/lite/tools/benchmark:benchmark_model
# Push binary to device
$ adb push bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model /data/local/tmp
# Push model to device
$ adb push /path/to/model.tflite /data/local/tmp/
# Run on-device benchmarking
$ adb shell /data/local/tmp/benchmark_model --graph=/data/local/tmp/model.tflite --use_nnapi=true --
nnapi_accelerator_name=google-edgetpu
```
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=line-too-long
# type: ignore
"""Configuration definitions for MobilenetEdgeTPU losses, learning rates, optimizers, and training."""
import dataclasses
import os
from typing import Any, Mapping, Optional
# Import libraries
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import optimization
from official.vision.beta.configs import common
from official.vision.beta.configs import image_classification as base_config
@dataclasses.dataclass
class MobilenetEdgeTPUModelConfig(base_config.ImageClassificationModel):
"""Configuration for the MobilenetEdgeTPU model.
Attributes:
name: The name of the model. Defaults to 'MobilenetEdgeTPU'.
model_params: A dictionary that represents the parameters of the
EfficientNet model. These will be passed in to the "from_name" function.
"""
model_params: Mapping[str, Any] = dataclasses.field(
default_factory=lambda: { # pylint: disable=g-long-lambda
'model_name': 'mobilenet_edgetpu_v2_xs',
'model_weights_path': '',
'checkpoint_format': 'tf_checkpoint',
'overrides': {
'batch_norm': 'tpu',
'num_classes': 1001,
'rescale_input': False,
'dtype': 'bfloat16'
}
})
@dataclasses.dataclass
class MobilenetEdgeTPUTaskConfig(base_config.ImageClassificationTask):
"""Task defination for MobileNetEdgeTPU.
Attributes:
model: A `ModelConfig` instance.
saved_model_path: Instead of initializing a model from the model config,
the model can be loaded from a file path.
"""
model: MobilenetEdgeTPUModelConfig = MobilenetEdgeTPUModelConfig()
saved_model_path: Optional[str] = None
IMAGENET_TRAIN_EXAMPLES = 1281167
IMAGENET_VAL_EXAMPLES = 50000
IMAGENET_INPUT_PATH_BASE = 'imagenet-2012-tfrecord'
def mobilenet_edgetpu_base_experiment_config(
model_name: str) -> cfg.ExperimentConfig:
"""Image classification on imagenet with mobilenet_edgetpu.
Experiment config common across all mobilenet_edgetpu variants.
Args:
model_name: Name of the mobilenet_edgetpu model variant
Returns:
ExperimentConfig
"""
train_batch_size = 4096
eval_batch_size = 4096
steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
mobilenet_edgetpu_config = MobilenetEdgeTPUModelConfig(
num_classes=1001, input_size=[224, 224, 3])
mobilenet_edgetpu_config.model_params.model_name = model_name
config = cfg.ExperimentConfig(
task=MobilenetEdgeTPUTaskConfig(
model=mobilenet_edgetpu_config,
losses=base_config.Losses(label_smoothing=0.1),
train_data=base_config.DataConfig(
input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
dtype='bfloat16',
aug_type=common.Augmentation(type='autoaug')),
validation_data=base_config.DataConfig(
input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
is_training=False,
dtype='bfloat16',
drop_remainder=False,
global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig(
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch * 5,
max_to_keep=10,
train_steps=550 * steps_per_epoch,
validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'rmsprop',
'rmsprop': {
'rho': 0.9,
'momentum': 0.9,
'epsilon': 0.001,
}
},
'ema': {
'average_decay': 0.99,
'trainable_weights_only': False,
},
'learning_rate': {
'type': 'exponential',
'exponential': {
'initial_learning_rate':
0.008 * (train_batch_size // 128),
'decay_steps':
int(2.4 * steps_per_epoch),
'decay_rate':
0.97,
'staircase':
True
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 5 * steps_per_epoch,
'warmup_learning_rate': 0
}
},
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
# Registration for MobileNet-EdgeTPU-Search models.
# When this config is used, users need to specify the saved model path via
# --params_override=task.saved_model_path='your/saved_model/path/'.
@exp_factory.register_config_factory('mobilenet_edgetpu_search')
def mobilenet_edgetpu_search() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_search')
# Registration for MobileNet-EdgeTPU-V2 models.
@exp_factory.register_config_factory('mobilenet_edgetpu_v2_tiny')
def mobilenet_edgetpu_v2_tiny() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_v2_tiny')
# Registration for MobileNet-EdgeTPU-V2 models.
@exp_factory.register_config_factory('mobilenet_edgetpu_v2_xs')
def mobilenet_edgetpu_v2_xs() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_v2_xs')
@exp_factory.register_config_factory('mobilenet_edgetpu_v2_s')
def mobilenet_edgetpu_v2_s() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_v2_s')
@exp_factory.register_config_factory('mobilenet_edgetpu_v2_m')
def mobilenet_edgetpu_v2_m() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_v2_m')
@exp_factory.register_config_factory('mobilenet_edgetpu_v2_l')
def mobilenet_edgetpu_v2_l() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_v2_l')
# Registration for MobileNet-EdgeTPU-V1 models.
@exp_factory.register_config_factory('mobilenet_edgetpu')
def mobilenet_edgetpu() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu')
# Registration for MobileNet-EdgeTPU-V1 models.
# We use 'depth_multiplier' to scale the models.
# E.g. dm1p25 implies depth multiplier of 1.25x
@exp_factory.register_config_factory('mobilenet_edgetpu_dm1p25')
def mobilenet_edgetpu_dm1p25() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_dm1p25')
@exp_factory.register_config_factory('mobilenet_edgetpu_dm1p5')
def mobilenet_edgetpu_dm1p5() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_dm1p5')
@exp_factory.register_config_factory('mobilenet_edgetpu_dm1p75')
def mobilenet_edgetpu_dm1p75() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('mobilenet_edgetpu_dm1p75')
# Registration for AutoSeg-EdgeTPU backbones
@exp_factory.register_config_factory('autoseg_edgetpu_backbone_xs')
def autoseg_edgetpu_backbone_xs() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('autoseg_edgetpu_backbone_xs')
@exp_factory.register_config_factory('autoseg_edgetpu_backbone_s')
def autoseg_edgetpu_backbone_s() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('autoseg_edgetpu_backbone_s')
@exp_factory.register_config_factory('autoseg_edgetpu_backbone_m')
def autoseg_edgetpu_backbone_m() -> cfg.ExperimentConfig:
return mobilenet_edgetpu_base_experiment_config('autoseg_edgetpu_backbone_m')
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Semantic segmentation configuration definition.
The segmentation model is built using the mobilenet edgetpu v2 backbone and
deeplab v3 segmentation head.
"""
import dataclasses
import os
from typing import Optional
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
from official.modeling.hyperparams import config_definitions as cfg
from official.vision.beta.configs import common
from official.vision.beta.configs import decoders
from official.vision.beta.configs import semantic_segmentation as base_cfg
from official.vision.beta.configs.google import backbones
@dataclasses.dataclass
class MobileNetEdgeTPU(hyperparams.Config):
"""MobileNetEdgeTPU config."""
model_id: str = 'mobilenet_edgetpu_v2_s'
freeze_large_filters: Optional[int] = None
pretrained_checkpoint_path: Optional[str] = None
@dataclasses.dataclass
class Backbone(backbones.Backbone):
"""Configuration for backbones.
Attributes:
type: 'str', type of backbone be used, on the of fields below.
spinenet_seg: spinenet-seg backbone config.
"""
type: Optional[str] = None
mobilenet_edgetpu: MobileNetEdgeTPU = MobileNetEdgeTPU()
@dataclasses.dataclass
class CustomSemanticSegmentationTaskConfig(base_cfg.SemanticSegmentationTask):
"""Same config for custom taks."""
model: Optional[base_cfg.SemanticSegmentationModel] = None
train_data: base_cfg.DataConfig = base_cfg.DataConfig(is_training=True)
validation_data: base_cfg.DataConfig = base_cfg.DataConfig(is_training=False)
evaluation: base_cfg.Evaluation = base_cfg.Evaluation()
# ADE 20K Dataset
ADE20K_TRAIN_EXAMPLES = 20210
ADE20K_VAL_EXAMPLES = 2000
ADE20K_INPUT_PATH_BASE = 'gs://**/ADE20K'
PRETRAINED_CKPT_PATH_BASE = 'gs://**/placeholder_for_edgetpu_models'
BACKBONE_PRETRAINED_CHECKPOINT = {
'mobilenet_edgetpu_v2_l':
PRETRAINED_CKPT_PATH_BASE +
'/pretrained_checkpoints/mobilenet_edgetpu_v2_l/ckpt-171600',
'mobilenet_edgetpu_v2_m':
PRETRAINED_CKPT_PATH_BASE +
'/pretrained_checkpoints/mobilenet_edgetpu_v2_m/ckpt-171600',
'mobilenet_edgetpu_v2_s':
PRETRAINED_CKPT_PATH_BASE +
'/pretrained_checkpoints/mobilenet_edgetpu_v2_s/ckpt-171600',
'mobilenet_edgetpu_v2_xs':
PRETRAINED_CKPT_PATH_BASE +
'/pretrained_checkpoints/mobilenet_edgetpu_v2_xs/ckpt-171600',
}
BACKBONE_HEADPOINT = {
'mobilenet_edgetpu_v2_l': 4,
'mobilenet_edgetpu_v2_m': 4,
'mobilenet_edgetpu_v2_s': 4,
'mobilenet_edgetpu_v2_xs': 4,
}
BACKBONE_LOWER_FEATURES = {
'mobilenet_edgetpu_v2_l': 3,
'mobilenet_edgetpu_v2_m': 3,
'mobilenet_edgetpu_v2_s': 3,
'mobilenet_edgetpu_v2_xs': 3,
}
def seg_deeplabv3plus_ade20k_32(backbone: str,
init_backbone: bool = True
) -> cfg.ExperimentConfig:
"""Semantic segmentation on ADE20K dataset with deeplabv3+."""
epochs = 200
train_batch_size = 128
eval_batch_size = 32
image_size = 512
steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size
aspp_dilation_rates = [5, 10, 15]
pretrained_checkpoint_path = BACKBONE_PRETRAINED_CHECKPOINT[
backbone] if init_backbone else None
config = cfg.ExperimentConfig(
task=CustomSemanticSegmentationTaskConfig(
model=base_cfg.SemanticSegmentationModel(
# ADE20K uses only 32 semantic classes for train/evaluation.
# The void (background) class is ignored in train and evaluation.
num_classes=32,
input_size=[None, None, 3],
backbone=Backbone(
type='mobilenet_edgetpu',
mobilenet_edgetpu=MobileNetEdgeTPU(
model_id=backbone,
pretrained_checkpoint_path=pretrained_checkpoint_path,
freeze_large_filters=500,
)),
decoder=decoders.Decoder(
type='aspp',
aspp=decoders.ASPP(
level=BACKBONE_HEADPOINT[backbone],
use_depthwise_convolution=True,
dilation_rates=aspp_dilation_rates,
pool_kernel_size=[256, 256],
num_filters=128,
dropout_rate=0.3,
)),
head=base_cfg.SegmentationHead(
level=BACKBONE_HEADPOINT[backbone],
num_convs=2,
num_filters=256,
use_depthwise_convolution=True,
feature_fusion='deeplabv3plus',
low_level=BACKBONE_LOWER_FEATURES[backbone],
low_level_num_filters=48),
norm_activation=common.NormActivation(
activation='relu',
norm_momentum=0.99,
norm_epsilon=2e-3,
use_sync_bn=False)),
train_data=base_cfg.DataConfig(
input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'),
output_size=[image_size, image_size],
is_training=True,
global_batch_size=train_batch_size),
validation_data=base_cfg.DataConfig(
input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'),
output_size=[image_size, image_size],
is_training=False,
global_batch_size=eval_batch_size,
resize_eval_groundtruth=True,
drop_remainder=False),
evaluation=base_cfg.Evaluation(report_train_mean_iou=False),
),
trainer=cfg.TrainerConfig(
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
train_steps=epochs * steps_per_epoch,
validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'adam',
},
'learning_rate': {
'type': 'polynomial',
'polynomial': {
'initial_learning_rate': 0.0001,
'decay_steps': epochs * steps_per_epoch,
'end_learning_rate': 0.0,
'power': 0.9
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 4 * steps_per_epoch,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
def seg_deeplabv3plus_ade20k(backbone: str):
config = seg_deeplabv3plus_ade20k_32(backbone)
config.task.model.num_classes = 151
config.trainer.optimizer_config.learning_rate.polynomial.power = 1.1
config.task.model.decoder.aspp.num_filters = 160
config.task.model.head.low_level_num_filters = 64
return config
# Experiment configs for 32 output classes
@exp_factory.register_config_factory(
'deeplabv3plus_mobilenet_edgetpuv2_m_ade20k_32')
def deeplabv3plus_mobilenet_edgetpuv2_m_ade20k_32() -> cfg.ExperimentConfig:
return seg_deeplabv3plus_ade20k_32('mobilenet_edgetpu_v2_m')
@exp_factory.register_config_factory(
'deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32')
def deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32() -> cfg.ExperimentConfig:
return seg_deeplabv3plus_ade20k_32('mobilenet_edgetpu_v2_s')
@exp_factory.register_config_factory(
'deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32')
def deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32() -> cfg.ExperimentConfig:
return seg_deeplabv3plus_ade20k_32('mobilenet_edgetpu_v2_xs')
# Experiment configs for 151 output classes
@exp_factory.register_config_factory(
'deeplabv3plus_mobilenet_edgetpuv2_m_ade20k')
def deeplabv3plus_mobilenet_edgetpuv2_m_ade20k() -> cfg.ExperimentConfig:
config = seg_deeplabv3plus_ade20k('mobilenet_edgetpu_v2_m')
return config
@exp_factory.register_config_factory(
'deeplabv3plus_mobilenet_edgetpuv2_s_ade20k')
def deeplabv3plus_mobilenet_edgetpuv2_s_ade20k() -> cfg.ExperimentConfig:
config = seg_deeplabv3plus_ade20k('mobilenet_edgetpu_v2_s')
return config
@exp_factory.register_config_factory(
'deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k')
def deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k() -> cfg.ExperimentConfig:
config = seg_deeplabv3plus_ade20k('mobilenet_edgetpu_v2_xs')
return config
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=line-too-long
# type: ignore
"""Semantic segmentation configuration definition for AutoML built models."""
import dataclasses
import os
from typing import Any, List, Optional, Mapping
# Import libraries
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
from official.vision.beta.configs import semantic_segmentation as base_cfg
from official.vision.beta.configs.google import backbones
# ADE 20K Dataset
ADE20K_TRAIN_EXAMPLES = 20210
ADE20K_VAL_EXAMPLES = 2000
ADE20K_INPUT_PATH_BASE = 'gs://**/ADE20K'
PRETRAINED_CKPT_PATH_BASE = 'gs://**/placeholder_for_edgetpu_models/pretrained_checkpoints'
BACKBONE_PRETRAINED_CHECKPOINT = {
'autoseg_edgetpu_backbone_xs':
PRETRAINED_CKPT_PATH_BASE +
'/autoseg_edgetpu_backbone_xs/ckpt-171600',
'autoseg_edgetpu_backbone_s':
PRETRAINED_CKPT_PATH_BASE +
'/autoseg_edgetpu_backbone_s/ckpt-171600',
'autoseg_edgetpu_backbone_m':
PRETRAINED_CKPT_PATH_BASE +
'/autoseg_edgetpu_backbone_m/ckpt-171600',
}
@dataclasses.dataclass
class BiFPNHeadConfig(hyperparams.Config):
"""BiFPN-based segmentation head config."""
min_level: int = 3
max_level: int = 8
fpn_num_filters: int = 96
@dataclasses.dataclass
class Losses(hyperparams.Config):
label_smoothing: float = 0.0
ignore_label: int = 255
class_weights: List[float] = dataclasses.field(default_factory=list)
l2_weight_decay: float = 0.0
use_groundtruth_dimension: bool = True
top_k_percent_pixels: float = 1.0
@dataclasses.dataclass
class AutosegEdgeTPUModelConfig(hyperparams.Config):
"""Autoseg-EdgeTPU segmentation model config."""
num_classes: int = 0
input_size: List[int] = dataclasses.field(default_factory=list)
backbone: backbones.Backbone = backbones.Backbone()
head: BiFPNHeadConfig = BiFPNHeadConfig()
model_params: Mapping[str, Any] = dataclasses.field(
default_factory=lambda: { # pylint: disable=g-long-lambda
'model_name': 'autoseg_edgetpu_backbone_s',
'checkpoint_format': 'tf_checkpoint',
'overrides': {
'batch_norm': 'tpu',
'rescale_input': False,
'backbone_only': True,
'resolution': 512
}
})
@dataclasses.dataclass
class AutosegEdgeTPUTaskConfig(base_cfg.SemanticSegmentationTask):
"""The task config inherited from the base segmentation task."""
model: AutosegEdgeTPUModelConfig = AutosegEdgeTPUModelConfig()
train_data: base_cfg.DataConfig = base_cfg.DataConfig(is_training=True)
validation_data: base_cfg.DataConfig = base_cfg.DataConfig(is_training=False)
losses: Losses = Losses()
init_checkpoint: Optional[str] = None
init_checkpoint_modules: str = 'backbone' # all or backbone
model_output_keys: Optional[List[int]] = dataclasses.field(
default_factory=list)
def autoseg_edgetpu_experiment_config(backbone_name: str,
init_backbone: bool = True
) -> cfg.ExperimentConfig:
"""Experiment using the semantic segmenatation searched model.
Args:
backbone_name: Name of the backbone used for this model
init_backbone: Whether to initialize backbone from a pretrained checkpoint
Returns:
ExperimentConfig
"""
epochs = 300
train_batch_size = 64
eval_batch_size = 32
image_size = 512
steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size
train_steps = epochs * steps_per_epoch
model_config = AutosegEdgeTPUModelConfig(
num_classes=32, input_size=[image_size, image_size, 3])
model_config.model_params.model_name = backbone_name
if init_backbone:
model_config.model_params.model_weights_path = (
BACKBONE_PRETRAINED_CHECKPOINT[backbone_name])
model_config.model_params.overrides.resolution = image_size
config = cfg.ExperimentConfig(
task=AutosegEdgeTPUTaskConfig(
model=model_config,
train_data=base_cfg.DataConfig(
input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'),
output_size=[image_size, image_size],
is_training=True,
global_batch_size=train_batch_size,
aug_scale_min=0.5,
aug_scale_max=2.0),
validation_data=base_cfg.DataConfig(
input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'),
output_size=[image_size, image_size],
is_training=False,
resize_eval_groundtruth=True,
drop_remainder=True,
global_batch_size=eval_batch_size),
evaluation=base_cfg.Evaluation(report_train_mean_iou=False)),
trainer=cfg.TrainerConfig(
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch * 5,
max_to_keep=10,
train_steps=train_steps,
validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'nesterov': True,
'momentum': 0.9,
}
},
'ema': {
'average_decay': 0.9998,
'trainable_weights_only': False,
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 0.12,
'decay_steps': train_steps
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 5 * steps_per_epoch,
'warmup_learning_rate': 0
}
},
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
# Registration for searched segmentation model.
@exp_factory.register_config_factory('autoseg_edgetpu_xs')
def autoseg_edgetpu_xs() -> cfg.ExperimentConfig:
return autoseg_edgetpu_experiment_config('autoseg_edgetpu_backbone_xs')
# Registration for searched segmentation model.
@exp_factory.register_config_factory('autoseg_edgetpu_s')
def autoseg_edgetpu_s() -> cfg.ExperimentConfig:
return autoseg_edgetpu_experiment_config('autoseg_edgetpu_backbone_s')
# Registration for searched segmentation model.
@exp_factory.register_config_factory('autoseg_edgetpu_m')
def autoseg_edgetpu_m() -> cfg.ExperimentConfig:
return autoseg_edgetpu_experiment_config('autoseg_edgetpu_backbone_m')
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classification decoder and parser."""
# Import libraries
import tensorflow as tf
from official.vision.beta.dataloaders import classification_input
from official.vision.beta.ops import preprocess_ops
MEAN_RGB = (0.5 * 255, 0.5 * 255, 0.5 * 255)
STDDEV_RGB = (0.5 * 255, 0.5 * 255, 0.5 * 255)
def random_crop_image(image,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.05, 1.0),
max_attempts=100):
"""Randomly crop an arbitrary shaped slice from the input image.
Args:
image: a Tensor of shape [height, width, 3] representing the input image.
aspect_ratio_range: a list of floats. The cropped area of the image must
have an aspect ratio = width / height within this range.
area_range: a list of floats. The cropped reas of the image must contain
a fraction of the input image within this range.
max_attempts: the number of attempts at generating a cropped region of the
image of the specified constraints. After max_attempts failures, return
the entire image.
Returns:
cropped_image: a Tensor representing the random cropped image. Can be the
original image if max_attempts is exhausted.
"""
with tf.name_scope('random_crop_image'):
crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box(
tf.shape(image),
tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
min_object_covered=0.1,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
max_attempts=max_attempts,
use_image_if_no_bounding_boxes=True)
cropped_image = tf.slice(image, crop_offset, crop_size)
return cropped_image
def random_crop_image_v2(image_bytes,
image_shape,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.05, 1.0),
max_attempts=100):
"""Randomly crop an arbitrary shaped slice from the input image.
This is a faster version of `random_crop_image` which takes the original
image bytes and image size as the inputs, and partially decode the JPEG
bytes according to the generated crop.
Args:
image_bytes: a Tensor of type string representing the raw image bytes.
image_shape: a Tensor specifying the shape of the raw image.
aspect_ratio_range: a list of floats. The cropped area of the image must
have an aspect ratio = width / height within this range.
area_range: a list of floats. The cropped reas of the image must contain
a fraction of the input image within this range.
max_attempts: the number of attempts at generating a cropped region of the
image of the specified constraints. After max_attempts failures, return
the entire image.
Returns:
cropped_image: a Tensor representing the random cropped image. Can be the
original image if max_attempts is exhausted.
"""
with tf.name_scope('random_crop_image_v2'):
crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box(
image_shape,
tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
min_object_covered=0.1,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
max_attempts=max_attempts,
use_image_if_no_bounding_boxes=True)
offset_y, offset_x, _ = tf.unstack(crop_offset)
crop_height, crop_width, _ = tf.unstack(crop_size)
crop_window = tf.stack([offset_y, offset_x, crop_height, crop_width])
cropped_image = tf.image.decode_and_crop_jpeg(
image_bytes, crop_window, channels=3)
return cropped_image
class Decoder(classification_input.Decoder):
"""A tf.Example decoder for classification task."""
pass
class Parser(classification_input.Parser):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def _parse_train_image(self, decoded_tensors):
"""Parses image data for training."""
image_bytes = decoded_tensors[self._image_field_key]
if self._decode_jpeg_only:
image_shape = tf.image.extract_jpeg_shape(image_bytes)
# Crops image.
cropped_image = random_crop_image_v2(
image_bytes, image_shape)
image = tf.cond(
tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
lambda: preprocess_ops.center_crop_image_v2(image_bytes, image_shape),
lambda: cropped_image)
else:
# Decodes image.
image = tf.io.decode_image(image_bytes, channels=3)
image.set_shape([None, None, 3])
# Crops image.
cropped_image = random_crop_image(image)
image = tf.cond(
tf.reduce_all(tf.equal(tf.shape(cropped_image), tf.shape(image))),
lambda: preprocess_ops.center_crop_image(image),
lambda: cropped_image)
if self._aug_rand_hflip:
image = tf.image.random_flip_left_right(image)
# Resizes image.
image = tf.image.resize(
image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
# Apply autoaug or randaug.
if self._augmenter is not None:
image = self._augmenter.distort(image)
# Normalizes image with mean and std pixel values.
image = preprocess_ops.normalize_image(image,
offset=MEAN_RGB,
scale=STDDEV_RGB)
# Convert image to self._dtype.
image = tf.image.convert_image_dtype(image, self._dtype)
return image
def _parse_eval_image(self, decoded_tensors):
"""Parses image data for evaluation."""
image_bytes = decoded_tensors[self._image_field_key]
if self._decode_jpeg_only:
image_shape = tf.image.extract_jpeg_shape(image_bytes)
# Center crops.
image = preprocess_ops.center_crop_image_v2(image_bytes, image_shape)
else:
# Decodes image.
image = tf.io.decode_image(image_bytes, channels=3)
image.set_shape([None, None, 3])
# Center crops.
image = preprocess_ops.center_crop_image(image)
image = tf.image.resize(
image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
# Normalizes image with mean and std pixel values.
image = preprocess_ops.normalize_image(image,
offset=MEAN_RGB,
scale=STDDEV_RGB)
# Convert image to self._dtype.
image = tf.image.convert_image_dtype(image, self._dtype)
return image
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests classification_input.py."""
from absl.testing import parameterized
import tensorflow as tf
from official.projects.edgetpu.vision.dataloaders import classification_input
from official.vision.beta.configs import common
from official.vision.beta.dataloaders import tfexample_utils
IMAGE_FIELD_KEY = 'image/encoded'
LABEL_FIELD_KEY = 'image/class/label'
class DecoderTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
(100, 100, 0),
(100, 100, 1),
(100, 100, 2),
)
def test_decoder(self, image_height, image_width, num_instances):
decoder = classification_input.Decoder(
image_field_key=IMAGE_FIELD_KEY, label_field_key=LABEL_FIELD_KEY)
serialized_example = tfexample_utils.create_classification_example(
image_height, image_width)
decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example))
results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
self.assertCountEqual([IMAGE_FIELD_KEY, LABEL_FIELD_KEY], results.keys())
self.assertEqual(0, results[LABEL_FIELD_KEY])
class ParserTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
([224, 224, 3], 'float32', True, 'autoaug', False, True, 'JPEG'),
([224, 224, 3], 'float16', True, 'randaug', False, False, 'PNG'),
([224, 224, 3], 'float32', False, None, False, True, 'JPEG'),
([224, 224, 3], 'float16', False, None, False, False, 'PNG'),
([512, 640, 3], 'float32', True, 'randaug', False, False, 'JPEG'),
([512, 640, 3], 'float16', True, 'autoaug', False, False, 'PNG'),
([512, 640, 3], 'float32', False, None, False, True, 'JPEG'),
([512, 640, 3], 'float16', False, None, False, False, 'PNG'),
([640, 640, 3], 'float32', True, None, False, False, 'JPEG'),
([640, 640, 3], 'bfloat16', True, None, False, False, 'PNG'),
([640, 640, 3], 'float32', False, None, False, False, 'JPEG'),
([640, 640, 3], 'bfloat16', False, None, False, False, 'PNG'),
([224, 224, 3], 'float32', True, 'autoaug', True, True, 'JPEG'),
([224, 224, 3], 'float16', True, 'randaug', True, False, 'PNG'),
)
def test_parser(self, output_size, dtype, is_training, aug_name,
is_multilabel, decode_jpeg_only, image_format):
serialized_example = tfexample_utils.create_classification_example(
output_size[0], output_size[1], image_format, is_multilabel)
if aug_name == 'randaug':
aug_type = common.Augmentation(
type=aug_name, randaug=common.RandAugment(magnitude=10))
elif aug_name == 'autoaug':
aug_type = common.Augmentation(
type=aug_name, autoaug=common.AutoAugment(augmentation_name='test'))
else:
aug_type = None
decoder = classification_input.Decoder(
image_field_key=IMAGE_FIELD_KEY, label_field_key=LABEL_FIELD_KEY,
is_multilabel=is_multilabel)
parser = classification_input.Parser(
output_size=output_size[:2],
num_classes=10,
image_field_key=IMAGE_FIELD_KEY,
label_field_key=LABEL_FIELD_KEY,
is_multilabel=is_multilabel,
decode_jpeg_only=decode_jpeg_only,
aug_rand_hflip=False,
aug_type=aug_type,
dtype=dtype)
decoded_tensors = decoder.decode(serialized_example)
image, label = parser.parse_fn(is_training)(decoded_tensors)
self.assertAllEqual(image.numpy().shape, output_size)
if not is_multilabel:
self.assertAllEqual(label, 0)
else:
self.assertAllEqual(label.numpy().shape, [10])
if dtype == 'float32':
self.assertAllEqual(image.dtype, tf.float32)
elif dtype == 'float16':
self.assertAllEqual(image.dtype, tf.float16)
elif dtype == 'bfloat16':
self.assertAllEqual(image.dtype, tf.bfloat16)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment