"...resnet50_tensorflow.git" did not exist on "c42ce18ce500893f2aa0bf08828a9d0f8083fbae"
Unverified Commit 7479dbb8 authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'tensorflow:master' into panoptic-deeplab-modeling

parents 8b60a5a8 9c8cbd0c
......@@ -61,7 +61,7 @@ class CosineLearningRateWithLinearWarmup(
"""Class to generate learning rate tensor."""
def __init__(self, total_steps, params):
"""Creates the consine learning rate tensor with linear warmup."""
"""Creates the cosine learning rate tensor with linear warmup."""
super(CosineLearningRateWithLinearWarmup, self).__init__()
self._total_steps = total_steps
assert isinstance(params, (dict, params_dict.ParamsDict))
......
......@@ -78,7 +78,7 @@ class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor."""
def __init__(self, batch_size: int, total_steps: int, warmup_steps: int):
"""Creates the consine learning rate tensor with linear warmup.
"""Creates the cosine learning rate tensor with linear warmup.
Args:
batch_size: The training batch size used in the experiment.
......
......@@ -216,14 +216,14 @@ class StepCosineLrConfig(base_config.Config):
"""Configuration for stepwise learning rate decay.
This class is a container for the piecewise cosine learning rate scheduling
configs. It will configure an instance of StepConsineDecayWithOffset keras
configs. It will configure an instance of StepCosineDecayWithOffset keras
learning rate schedule.
```python
boundaries: [100000, 110000]
values: [1.0, 0.5]
lr_decayed_fn = (
lr_schedule.StepConsineDecayWithOffset(
lr_schedule.StepCosineDecayWithOffset(
boundaries,
values))
```
......@@ -243,7 +243,7 @@ class StepCosineLrConfig(base_config.Config):
[boundaries[n], end] -> values[n+1] to 0.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name: str = 'StepConsineDecayWithOffset'
name: str = 'StepCosineDecayWithOffset'
boundaries: Optional[List[int]] = None
values: Optional[List[float]] = None
offset: int = 0
......
......@@ -386,11 +386,11 @@ class PowerDecayWithOffset(tf.keras.optimizers.schedules.LearningRateSchedule):
}
class StepConsineDecayWithOffset(
class StepCosineDecayWithOffset(
tf.keras.optimizers.schedules.LearningRateSchedule):
"""Stepwise cosine learning rate decay with offset.
Learning rate is equivalent to one or more consine decay(s) starting and
Learning rate is equivalent to one or more cosine decay(s) starting and
ending at each interval.
ExampleL
......@@ -399,7 +399,7 @@ class StepConsineDecayWithOffset(
boundaries: [100000, 110000]
values: [1.0, 0.5]
lr_decayed_fn = (
lr_schedule.StepConsineDecayWithOffset(
lr_schedule.StepCosineDecayWithOffset(
boundaries,
values))
```
......@@ -412,7 +412,7 @@ class StepConsineDecayWithOffset(
boundaries,
values,
offset: int = 0,
name: str = "StepConsineDecayWithOffset"):
name: str = "StepCosineDecayWithOffset"):
"""Initialize configuration of the learning rate schedule.
Args:
......@@ -444,7 +444,7 @@ class StepConsineDecayWithOffset(
] + [0])
def __call__(self, global_step):
with tf.name_scope(self.name or "StepConsineDecayWithOffset"):
with tf.name_scope(self.name or "StepCosineDecayWithOffset"):
global_step = tf.cast(global_step - self.offset, tf.float32)
lr_levels = self.values
lr_steps = self.boundaries
......
......@@ -47,7 +47,7 @@ LR_CLS = {
'power': lr_schedule.DirectPowerDecay,
'power_linear': lr_schedule.PowerAndLinearDecay,
'power_with_offset': lr_schedule.PowerDecayWithOffset,
'step_cosine_with_offset': lr_schedule.StepConsineDecayWithOffset,
'step_cosine_with_offset': lr_schedule.StepCosineDecayWithOffset,
}
WARMUP_CLS = {
......
......@@ -33,7 +33,6 @@ from official.nlp.tools import tokenization
FLAGS = flags.FLAGS
# TODO(chendouble): consider moving each task to its own binary.
flags.DEFINE_enum(
"fine_tuning_task_type", "classification",
["classification", "regression", "squad", "retrieval", "tagging"],
......
......@@ -1086,12 +1086,17 @@ class Encoder(Module):
self.output_dropout = Dropout(self.config.dropout_rate,)
@tf.Module.with_name_scope
def __call__(self, inputs, encoder_mask=None, training=False):
def __call__(self,
inputs,
encoder_mask=None,
dense_inputs=None,
training=False):
"""Applies Transformer model on the inputs.
Args:
inputs: input data
encoder_mask: the encoder self-attention mask.
dense_inputs: dense input data, concat after the embedding.
training: whether it is training pass, affecting dropouts.
Returns:
......@@ -1102,11 +1107,20 @@ class Encoder(Module):
encoder_mask = tf.cast(encoder_mask, self.compute_dtype)
cfg = self.config
x = self.input_embed(inputs, one_hot=cfg.one_hot_embedding)
if dense_inputs is not None:
x = tf.concat([x, dense_inputs], axis=1)
tensor_shape = tf_utils.get_shape_list(x)
tensor_shape[-2] = 1
x = self.input_dropout(x, noise_shape=tensor_shape, training=training)
input_length = tf_utils.get_shape_list(inputs)[1]
position_bias = self.relative_embedding(input_length, input_length)
if dense_inputs is not None:
# Here we ignore relative position bias for dense embeddings.
dense_input_length = tf_utils.get_shape_list(dense_inputs)[1]
# Position bias shape: [batch, 1, len, len]
paddings = tf.constant([[0, 0], [0, 0], [0, dense_input_length],
[0, dense_input_length]])
position_bias = tf.pad(position_bias, paddings, "CONSTANT")
for i in range(cfg.num_layers):
x = self.encoder_layers[i](
......@@ -1308,31 +1322,56 @@ class T5Transformer(Module):
def encode(self,
encoder_input_tokens,
encoder_segment_ids=None,
encoder_dense_inputs=None,
encoder_dense_segment_ids=None,
training=False):
eligible_positions = tf.cast(
tf.not_equal(encoder_input_tokens, 0), self.compute_dtype)
if encoder_dense_inputs is not None:
eligible_dense_position = tf.cast(
tf.reduce_any(tf.not_equal(encoder_dense_inputs, 0), axis=-1),
self.compute_dtype)
eligible_positions = tf.concat(
[eligible_positions, eligible_dense_position], axis=1)
encoder_mask = make_attention_mask(
eligible_positions, eligible_positions, dtype=tf.bool)
if encoder_segment_ids is not None:
if encoder_dense_segment_ids is not None:
encoder_segment_ids = tf.concat(
[encoder_segment_ids, encoder_dense_segment_ids], axis=1)
segment_mask = make_attention_mask(
encoder_segment_ids, encoder_segment_ids, tf.equal, dtype=tf.bool)
encoder_mask = tf.math.logical_and(encoder_mask, segment_mask)
encoder_mask = (1.0 - tf.cast(encoder_mask, self.compute_dtype)) * -1e9
return self.encoder(encoder_input_tokens, encoder_mask, training=training)
return self.encoder(
encoder_input_tokens,
encoder_mask,
encoder_dense_inputs,
training=training)
def decode(
self,
encoded,
decoder_target_tokens,
encoder_input_tokens, # only used for masks
encoder_dense_inputs=None,
decoder_input_tokens=None,
encoder_segment_ids=None,
encoder_dense_segment_ids=None,
decoder_segment_ids=None,
decode_position=None,
cache=None,
max_decode_len=None,
decode=False,
training=False):
eligible_inputs = tf.cast(
tf.not_equal(encoder_input_tokens, 0), self.compute_dtype)
if encoder_dense_inputs is not None:
eligible_dense_inputs = tf.cast(
tf.reduce_any(tf.not_equal(encoder_dense_inputs, 0), axis=-1),
self.compute_dtype)
eligible_inputs = tf.concat([eligible_inputs, eligible_dense_inputs],
axis=1)
if decode:
# For decoding, the decoder_input_tokens is the decoder_target_tokens.
decoder_input_tokens = decoder_target_tokens
......@@ -1342,14 +1381,12 @@ class T5Transformer(Module):
tf.cast(
tf.not_equal(tf.ones_like(decoder_target_tokens), 0),
self.compute_dtype),
tf.cast(tf.not_equal(encoder_input_tokens, 0), self.compute_dtype),
eligible_inputs,
dtype=tf.bool)
else:
# Note that, masks should be created using decoder_target_tokens.
eligible_targets = tf.cast(
tf.not_equal(decoder_target_tokens, 0), self.compute_dtype)
eligible_inputs = tf.cast(
tf.not_equal(encoder_input_tokens, 0), self.compute_dtype)
decoder_mask = tf.math.logical_and(
make_attention_mask(
eligible_targets, eligible_targets, dtype=tf.bool),
......@@ -1365,6 +1402,9 @@ class T5Transformer(Module):
decoder_segment_ids,
tf.equal,
dtype=tf.bool))
if encoder_dense_segment_ids is not None:
encoder_segment_ids = tf.concat(
[encoder_segment_ids, encoder_dense_segment_ids], axis=1)
encoder_decoder_mask = tf.math.logical_and(
encoder_decoder_mask,
make_attention_mask(
......@@ -1392,6 +1432,8 @@ class T5Transformer(Module):
def __call__(self,
encoder_input_tokens,
decoder_target_tokens,
encoder_dense_inputs=None,
encoder_dense_segment_ids=None,
decoder_input_tokens=None,
encoder_segment_ids=None,
decoder_segment_ids=None,
......@@ -1401,9 +1443,12 @@ class T5Transformer(Module):
Args:
encoder_input_tokens: input tokens to the encoder.
decoder_target_tokens: target tokens to the decoder.
encoder_dense_inputs: input dense vectors to the encoder.
encoder_dense_segment_ids: dense input segmentation info for packed
decoder_input_tokens: input tokens to the decoder, only required for
training.
encoder_segment_ids: input segmentation info for packed examples.
examples.
decoder_segment_ids: target segmentation info for packed examples.
training: whether it is training pass, affecting dropouts.
......@@ -1413,13 +1458,17 @@ class T5Transformer(Module):
encoded = self.encode(
encoder_input_tokens,
encoder_segment_ids=encoder_segment_ids,
encoder_dense_inputs=encoder_dense_inputs,
encoder_dense_segment_ids=encoder_dense_segment_ids,
training=training)
outputs = self.decode(
encoded=encoded,
decoder_target_tokens=decoder_target_tokens,
encoder_input_tokens=encoder_input_tokens, # only used for masks.
encoder_dense_inputs=encoder_dense_inputs, # only used for masks.
decoder_input_tokens=decoder_input_tokens,
encoder_segment_ids=encoder_segment_ids,
encoder_dense_segment_ids=encoder_dense_segment_ids,
decoder_segment_ids=decoder_segment_ids,
training=training)
outputs["encoded"] = encoded
......
......@@ -354,6 +354,24 @@ class T5Test(tf.test.TestCase, parameterized.TestCase):
encoded = encoder(tf.zeros((4, 8), dtype=tf.int32))
self.assertEqual(encoded.shape, (4, 8, config.d_model))
@parameterized.named_parameters(("bfloat16", tf.bfloat16),
("float32", tf.float32))
def test_encoder_with_dense(self, dtype):
config = t5.T5TransformerParams(
num_layers=2,
d_model=4,
d_kv=3,
num_heads=4,
d_ff=16,
vocab_size=10,
vocab_embeddings_initializer=tf.keras.initializers.Ones(),
relative_embeddings_initializer=tf.keras.initializers.Ones())
encoder = t5.Encoder(config, compute_dtype=dtype)
encoded = encoder(
tf.zeros((4, 8), dtype=tf.int32),
dense_inputs=tf.ones((4, 2, 4), dtype=dtype))
self.assertEqual(encoded.shape, (4, 10, config.d_model))
def test_decoder(self):
max_decode_len = 10
config = t5.T5TransformerParams(
......@@ -445,6 +463,58 @@ class T5Test(tf.test.TestCase, parameterized.TestCase):
print(v.name, v.shape)
self.assertEqual(v.dtype, tf.float32)
@parameterized.named_parameters(
("t5_10", ("relu",), True, 26, False, tf.float32),)
def test_transformer_with_dense(self, ffn_activations, logits_via_embedding,
expect_num_variables, layer_sharing, dtype):
max_decode_len = 10
config = t5.T5TransformerParams(
num_layers=1,
d_model=8,
d_kv=4,
num_heads=4,
d_ff=32,
vocab_size=10,
shared_embedding=True,
layer_sharing=layer_sharing,
ffn_activations=ffn_activations,
logits_via_embedding=logits_via_embedding)
transformer = t5.T5Transformer(config, compute_dtype=dtype)
self.assertLen(transformer.trainable_variables, expect_num_variables)
inputs = tf.convert_to_tensor(
np.array([[2, 2, 1, 3, 1, 0], [3, 3, 1, 2, 2, 1]]))
segments = tf.convert_to_tensor(
np.array([[1, 1, 1, 2, 2, 0], [1, 1, 1, 2, 2, 2]]))
dense_inputs = tf.convert_to_tensor(np.random.randn(2, 2, 8), dtype=dtype)
dense_segments = tf.convert_to_tensor(np.array([[1, 2], [1, 2]]))
outputs = transformer(
encoder_input_tokens=inputs,
encoder_dense_inputs=dense_inputs,
decoder_input_tokens=inputs,
decoder_target_tokens=inputs,
encoder_segment_ids=segments,
encoder_dense_segment_ids=dense_segments,
decoder_segment_ids=segments)
cache = {}
batch_size = 2
cache[0] = _create_cache(
batch_size, max_decode_len, config.num_heads, config.d_kv, dtype=dtype)
outputs = transformer.decode(
encoder_input_tokens=inputs,
encoder_dense_inputs=dense_inputs,
encoded=outputs["encoded"],
decoder_target_tokens=tf.ones((batch_size, 1), dtype=tf.int32),
decode_position=1,
decode=True,
max_decode_len=max_decode_len,
cache=cache)
self.assertEqual(outputs["logits"].shape,
(batch_size, 1, config.vocab_size))
for v in transformer.trainable_variables:
print(v.name, v.shape)
self.assertEqual(v.dtype, tf.float32)
@parameterized.named_parameters(
("t5_10", ("relu",), True, 39, tf.float32, 2),
("t5_10_bfloat16", ("relu",), True, 39, tf.bfloat16, 2))
......
......@@ -32,10 +32,12 @@ def _create_fake_dataset(output_path, seq_length, num_classes, num_examples):
writer = tf.io.TFRecordWriter(output_path)
def create_int_feature(values):
return tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
return tf.train.Feature(
int64_list=tf.train.Int64List(value=np.ravel(values)))
def create_float_feature(values):
return tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
return tf.train.Feature(
float_list=tf.train.FloatList(value=np.ravel(values)))
for i in range(num_examples):
features = {}
......
......@@ -20,8 +20,8 @@ import sys
from setuptools import find_packages
from setuptools import setup
version = '2.7.0'
tf_version = '2.7.0' # Major version.
version = '2.8.0'
tf_version = '2.8.0' # Major version.
project_name = 'tf-models-official'
......
......@@ -203,8 +203,7 @@ class BASNetTask(base_task.Task):
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if isinstance(
optimizer, tf.keras.mixed_precision.experimental.LossScaleOptimizer):
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
scaled_loss = optimizer.get_scaled_loss(scaled_loss)
tvars = model.trainable_variables
......@@ -212,8 +211,7 @@ class BASNetTask(base_task.Task):
# Scales back gradient before apply_gradients when LossScaleOptimizer is
# used.
if isinstance(
optimizer, tf.keras.mixed_precision.experimental.LossScaleOptimizer):
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
grads = optimizer.get_unscaled_gradients(grads)
# Apply gradient clipping.
......
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r"""Generates example dataset for post-training quantization.
Example command line to run the script:
```shell
python3 quantize_movinet.py \
--saved_model_dir=${SAVED_MODEL_DIR} \
--saved_model_with_states_dir=${SAVED_MODEL_WITH_STATES_DIR} \
--output_dataset_dir=${OUTPUT_DATASET_DIR} \
--output_tflite=${OUTPUT_TFLITE} \
--quantization_mode='int_float_fallback' \
--save_dataset_to_tfrecords=True
```
"""
import functools
from typing import Any, Callable, Mapping, Optional
from absl import app
from absl import flags
from absl import logging
import numpy as np
import tensorflow.compat.v2 as tf
import tensorflow_hub as hub
from official.vision.beta.configs import video_classification as video_classification_configs
from official.vision.beta.tasks import video_classification
tf.enable_v2_behavior()
FLAGS = flags.FLAGS
flags.DEFINE_string(
'saved_model_dir', None, 'The saved_model directory.')
flags.DEFINE_string(
'saved_model_with_states_dir', None,
'The directory to the saved_model with state signature. '
'The saved_model_with_states is needed in order to get the initial state '
'shape and dtype while saved_model is used for the quantization.')
flags.DEFINE_string(
'output_tflite', '/tmp/output.tflite',
'The output tflite file path.')
flags.DEFINE_integer(
'temporal_stride', 5,
'Temporal stride used to generate input videos.')
flags.DEFINE_integer(
'num_frames', 50, 'Input videos number of frames.')
flags.DEFINE_integer(
'image_size', 172, 'Input videos frame size.')
flags.DEFINE_string(
'quantization_mode', None,
'The quantization mode. Can be one of "float16", "int8",'
'"int_float_fallback" or None.')
flags.DEFINE_integer(
'num_calibration_videos', 100,
'Number of videos to run to generate example datasets.')
flags.DEFINE_integer(
'num_samples_per_video', 3,
'Number of sample draw from one single video.')
flags.DEFINE_boolean(
'save_dataset_to_tfrecords', False,
'Whether to save representative dataset to the disk.')
flags.DEFINE_string(
'output_dataset_dir', '/tmp/representative_dataset/',
'The directory to store exported tfrecords.')
flags.DEFINE_integer(
'max_saved_files', 100,
'The maximum number of tfrecord files to save.')
def _bytes_feature(value):
"""Returns a bytes_list from a string / byte."""
if isinstance(value, type(tf.constant(0))):
value = value.numpy() # BytesList won't unpack string from an EagerTensor.
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _float_feature(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _build_tf_example(feature):
return tf.train.Example(
features=tf.train.Features(feature=feature)).SerializeToString()
def save_to_tfrecord(input_frame: tf.Tensor,
input_states: Mapping[str, tf.Tensor],
frame_index: int,
predictions: tf.Tensor,
output_states: Mapping[str, tf.Tensor],
groundtruth_label_id: tf.Tensor,
output_dataset_dir: str,
file_index: int):
"""Save results to tfrecord."""
features = {}
features['frame_id'] = _int64_feature([frame_index])
features['groundtruth_label'] = _int64_feature(
groundtruth_label_id.numpy().flatten().tolist())
features['predictions'] = _float_feature(
predictions.numpy().flatten().tolist())
image_string = tf.io.encode_png(
tf.squeeze(tf.cast(input_frame * 255., tf.uint8), axis=[0, 1]))
features['image'] = _bytes_feature(image_string.numpy())
# Input/Output states at time T
for k, v in output_states.items():
dtype = v[0].dtype
if dtype == tf.int32:
features['input/' + k] = _int64_feature(
input_states[k].numpy().flatten().tolist())
features['output/' + k] = _int64_feature(
output_states[k].numpy().flatten().tolist())
elif dtype == tf.float32:
features['input/' + k] = _float_feature(
input_states[k].numpy().flatten().tolist())
features['output/' + k] = _float_feature(
output_states[k].numpy().flatten().tolist())
else:
raise ValueError(f'Unrecongized dtype: {dtype}')
tfe = _build_tf_example(features)
record_file = '{}/movinet_stream_{:06d}.tfrecords'.format(
output_dataset_dir, file_index)
logging.info('Saving to %s.', record_file)
with tf.io.TFRecordWriter(record_file) as writer:
writer.write(tfe)
def get_dataset() -> tf.data.Dataset:
"""Gets dataset source."""
config = video_classification_configs.video_classification_kinetics600()
temporal_stride = FLAGS.temporal_stride
num_frames = FLAGS.num_frames
image_size = FLAGS.image_size
feature_shape = (num_frames, image_size, image_size, 3)
config.task.validation_data.global_batch_size = 1
config.task.validation_data.feature_shape = feature_shape
config.task.validation_data.temporal_stride = temporal_stride
config.task.train_data.min_image_size = int(1.125 * image_size)
config.task.validation_data.dtype = 'float32'
config.task.validation_data.drop_remainder = False
task = video_classification.VideoClassificationTask(config.task)
valid_dataset = task.build_inputs(config.task.validation_data)
valid_dataset = valid_dataset.map(lambda x, y: (x['image'], y))
valid_dataset = valid_dataset.prefetch(32)
return valid_dataset
def stateful_representative_dataset_generator(
model: tf.keras.Model,
dataset_iter: Any,
init_states: Mapping[str, tf.Tensor],
save_dataset_to_tfrecords: bool = False,
max_saved_files: int = 100,
output_dataset_dir: Optional[str] = None,
num_samples_per_video: int = 3,
num_calibration_videos: int = 100):
"""Generates sample input data with states.
Args:
model: the inference keras model.
dataset_iter: the dataset source.
init_states: the initial states for the model.
save_dataset_to_tfrecords: whether to save the representative dataset to
tfrecords on disk.
max_saved_files: the max number of saved tfrecords files.
output_dataset_dir: the directory to store the saved tfrecords.
num_samples_per_video: number of randomly sampled frames per video.
num_calibration_videos: number of calibration videos to run.
Yields:
A dictionary of model inputs.
"""
counter = 0
for i in range(num_calibration_videos):
if i % 100 == 0:
logging.info('Reading representative dateset id %d.', i)
example_input, example_label = next(dataset_iter)
groundtruth_label_id = tf.argmax(example_label, axis=-1)
input_states = init_states
# split video into frames along the temporal dimension.
frames = tf.split(example_input, example_input.shape[1], axis=1)
random_indices = np.random.randint(
low=1, high=len(frames), size=num_samples_per_video)
# always include the first frame
random_indices[0] = 0
random_indices = set(random_indices)
for frame_index, frame in enumerate(frames):
predictions, output_states = model({'image': frame, **input_states})
if frame_index in random_indices:
if save_dataset_to_tfrecords and counter < max_saved_files:
save_to_tfrecord(
input_frame=frame,
input_states=input_states,
frame_index=frame_index,
predictions=predictions,
output_states=output_states,
groundtruth_label_id=groundtruth_label_id,
output_dataset_dir=output_dataset_dir,
file_index=counter)
yield {'image': frame, **input_states}
counter += 1
# update states for the next inference step
input_states = output_states
def get_tflite_converter(
saved_model_dir: str,
quantization_mode: str,
representative_dataset: Optional[Callable[..., Any]] = None
) -> tf.lite.TFLiteConverter:
"""Gets tflite converter."""
converter = tf.lite.TFLiteConverter.from_saved_model(
saved_model_dir=saved_model_dir)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
if quantization_mode == 'float16':
logging.info('Using float16 quantization.')
converter.target_spec.supported_types = [tf.float16]
elif quantization_mode == 'int8':
logging.info('Using full interger quantization.')
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
elif quantization_mode == 'int_float_fallback':
logging.info('Using interger quantization with float-point fallback.')
converter.representative_dataset = representative_dataset
else:
logging.info('Using dynamic range quantization.')
return converter
def quantize_movinet(dataset_fn):
"""Quantizes Movinet."""
valid_dataset = dataset_fn()
dataset_iter = iter(valid_dataset)
# Load model
encoder = hub.KerasLayer(FLAGS.saved_model_with_states_dir, trainable=False)
inputs = tf.keras.layers.Input(
shape=[1, FLAGS.image_size, FLAGS.image_size, 3],
dtype=tf.float32,
name='image')
# Define the state inputs, which is a dict that maps state names to tensors.
init_states_fn = encoder.resolved_object.signatures['init_states']
state_shapes = {
name: ([s if s > 0 else None for s in state.shape], state.dtype)
for name, state in init_states_fn(
tf.constant([1, 1, FLAGS.image_size, FLAGS.image_size, 3])).items()
}
states_input = {
name: tf.keras.Input(shape[1:], dtype=dtype, name=name)
for name, (shape, dtype) in state_shapes.items()
}
# The inputs to the model are the states and the video
inputs = {**states_input, 'image': inputs}
outputs = encoder(inputs)
model = tf.keras.Model(inputs, outputs, name='movinet_stream')
input_shape = tf.constant(
[1, FLAGS.num_frames, FLAGS.image_size, FLAGS.image_size, 3])
init_states = init_states_fn(input_shape)
# config representative_datset_fn
representative_dataset = functools.partial(
stateful_representative_dataset_generator,
model=model,
dataset_iter=dataset_iter,
init_states=init_states,
save_dataset_to_tfrecords=FLAGS.save_dataset_to_tfrecords,
max_saved_files=FLAGS.max_saved_files,
output_dataset_dir=FLAGS.output_dataset_dir,
num_samples_per_video=FLAGS.num_samples_per_video,
num_calibration_videos=FLAGS.num_calibration_videos)
converter = get_tflite_converter(
saved_model_dir=FLAGS.saved_model_dir,
quantization_mode=FLAGS.quantization_mode,
representative_dataset=representative_dataset)
logging.info('Converting...')
tflite_buffer = converter.convert()
return tflite_buffer
def main(_):
tflite_buffer = quantize_movinet(dataset_fn=get_dataset)
with open(FLAGS.output_tflite, 'wb') as f:
f.write(tflite_buffer)
logging.info('tflite model written to %s', FLAGS.output_tflite)
if __name__ == '__main__':
flags.mark_flag_as_required('saved_model_dir')
flags.mark_flag_as_required('saved_model_with_states_dir')
app.run(main)
# Quantization Aware Training Project for Computer Vision Models
[TOC]
⚠️ Disclaimer: All datasets hyperlinked from this page are not owned or
distributed by Google. The dataset is made available by third parties.
Please review the terms and conditions made available by the third parties
before using the data.
## Overview
This project includes quantization aware training code for Computer Vision
models. These are examples to show how to apply the Model Optimization Toolkit's
[quantization aware training API](https://www.tensorflow.org/model_optimization/guide/quantization/training).
Note: Currently, we support a limited number of ML tasks & models (e.g., image
classification and semantic segmentation)
We will keep adding support for other ML tasks and models in the next releases.
## How to train a model
```
EXPERIMENT=xxx # Change this for your run, for example, 'mobilenet_imagenet_qat'
CONFIG_FILE=xxx # Change this for your run, for example, path of imagenet_mobilenetv2_qat_gpu.yaml
MODEL_DIR=xxx # Change this for your run, for example, /tmp/model_dir
$ python3 train.py \
--experiment=${EXPERIMENT} \
--config_file=${CONFIG_FILE} \
--model_dir=${MODEL_DIR} \
--mode=train_and_eval
```
## Model Accuracy
<figure align="center">
<img width=70% src=https://storage.googleapis.com/tf_model_garden/models/qat/images/readme-qat-classification-plot.png>
<figcaption>Comparison of Imagenet top-1 accuracy for the classification models</figcaption>
</figure>
Note: The Top-1 model accuracy is measured on the validation set of [ImageNet](https://www.image-net.org/).
### Pre-trained Models
|Model |Resolution|Top-1 Accuracy (FP32)|Top-1 Accuracy (Int8/PTQ)|Top-1 Accuracy (Int8/QAT)|Config |Download |
|----------------------|----------|---------------------|-------------------------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------|
|MobileNetV2 |224x224 |72.782% |72.392% |72.792% |[config](https://github.com/tensorflow/models/blob/master/official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv2_qat_gpu.yaml) |[TFLite(Int8/QAT)](https://storage.googleapis.com/tf_model_garden/vision/mobilenet/v2_1.0_int8/mobilenet_v2_1.00_224_int8.tflite) |
|ResNet50 |224x224 |76.710% |76.420% |77.200% |[config](https://github.com/tensorflow/models/blob/master/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu.yaml) |[TFLite(Int8/QAT)](https://storage.googleapis.com/tf_model_garden/vision/resnet50_imagenet/resnet_50_224_int8.tflite) |
|MobileNetV3.5 MultiAVG|224x224 |75.212% |74.122% |75.130% |[config](https://github.com/tensorflow/models/blob/master/official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv3.5_qat_gpu.yaml)|[TFLite(Int8/QAT)](https://storage.googleapis.com/tf_model_garden/vision/mobilenet/v3.5multiavg_1.0_int8/mobilenet_v3.5multiavg_1.00_224_int8.tflite)|
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Configs package definition."""
from official.projects.qat.vision.configs import image_classification
from official.projects.qat.vision.configs import semantic_segmentation
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Image classification configuration definition."""
import dataclasses
from typing import Optional
from official.modeling import hyperparams
@dataclasses.dataclass
class Quantization(hyperparams.Config):
"""Quantization parameters.
Attributes:
pretrained_original_checkpoint: A string indicate pretrained checkpoint
location.
change_num_bits: A `bool` indicates whether to manually allocate num_bits.
num_bits_weight: An `int` number of bits for weight. Default to 8.
num_bits_activation: An `int` number of bits for activation. Default to 8.
"""
pretrained_original_checkpoint: Optional[str] = None
change_num_bits: bool = False
num_bits_weight: int = 8
num_bits_activation: int = 8
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float32'
loss_scale: 'dynamic'
task:
model:
num_classes: 1001
input_size: [224, 224, 3]
backbone:
type: 'mobilenet'
mobilenet:
model_id: 'MobileNetV2'
filter_size_scale: 1.0
dropout_rate: 0.1
losses:
l2_weight_decay: 0.0000001
one_hot: true
label_smoothing: 0.1
train_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 512 # 64 * 8
dtype: 'float32'
validation_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 512 # 64 * 8
dtype: 'float32'
drop_remainder: false
quantization:
pretrained_original_checkpoint: 'gs://**/mobilenetv2_gpu/22984194/ckpt-625500'
trainer:
# With below setting, the accuracy of QAT reaches to accuracy 0.7279 after 43 hours with 8 GPUS.
train_steps: 250200
validation_steps: 98
validation_interval: 2502
steps_per_loop: 2502
summary_interval: 2502
checkpoint_interval: 2502
optimizer_config:
learning_rate:
type: 'exponential'
exponential:
decay_rate: 0.9
decay_steps: 1251
initial_learning_rate: 0.0001
name: 'ExponentialDecay'
offset: 0
staircase: true
warmup:
type: 'linear'
linear:
warmup_steps: 0
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float32'
loss_scale: 'dynamic'
task:
model:
num_classes: 1001
input_size: [224, 224, 3]
backbone:
type: 'mobilenet'
mobilenet:
model_id: 'MobileNetV2'
filter_size_scale: 1.0
dropout_rate: 0.0 # changed from 0.2 to 0.0
losses:
l2_weight_decay: 0.0000001
one_hot: true
label_smoothing: 0.1
train_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 256
dtype: 'float32'
validation_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 256
dtype: 'float32'
drop_remainder: false
quantization:
pretrained_original_checkpoint: 'gs://**/mobilenetv2_gpu/22984194/ckpt-625500'
trainer:
# With below setting, the accuracy of QAT reaches Top1-accuracy 0.7251 at 420336 steps after
# 1 day 19 hours of training with 8GPUs, which is higher than the result of PTQ in MobileNetV2.
train_steps: 1000800 # 200 epochs
validation_steps: 196 # NUM_EXAMPLES (50000) // global_batch_size (256)
validation_interval: 5004 # 1 epoch
steps_per_loop: 5004 # NUM_EXAMPLES (1281167) // global_batch_size (256)
summary_interval: 5004 # 1 epoch
checkpoint_interval: 5004 # 1 epoch
max_to_keep: 200
optimizer_config:
learning_rate:
type: 'exponential'
exponential:
initial_learning_rate: 0.0001
decay_steps: 1251 # steps_per_epoch // 4
decay_rate: 0.96
staircase: true
warmup:
type: 'linear'
linear:
warmup_steps: 0
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float32'
loss_scale: 'dynamic'
task:
model:
num_classes: 1001
input_size: [224, 224, 3]
backbone:
type: 'mobilenet'
mobilenet:
model_id: 'MobileNetV2'
filter_size_scale: 1.0
dropout_rate: 0.0 # changed from 0.2 to 0.0
losses:
l2_weight_decay: 0.0000001
one_hot: true
label_smoothing: 0.1
train_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 512
dtype: 'float32'
validation_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 512
dtype: 'float32'
drop_remainder: false
quantization:
pretrained_original_checkpoint: 'gs://**/mobilenetv2_gpu/22984194/ckpt-625500'
trainer:
# With below setting, the accuracy of QAT reaches Top1-accuracy 0.7266 at 312750 steps after
# 1 day 22 hours of training with 8GPUs, which is higher than the result of PTQ in MobileNetV2.
train_steps: 500400 # 200 epochs
validation_steps: 98 # NUM_EXAMPLES (50000) // global_batch_size (512)
validation_interval: 2502 # 1 epoch
steps_per_loop: 2502 # NUM_EXAMPLES (1281167) // global_batch_size (512)
summary_interval: 2502 # 1 epoch
checkpoint_interval: 2502 # 1 epoch
max_to_keep: 200
optimizer_config:
learning_rate:
type: 'exponential'
exponential:
initial_learning_rate: 0.0002
decay_steps: 1251 # steps_per_epoch // 2
decay_rate: 0.96
staircase: true
warmup:
type: 'linear'
linear:
warmup_steps: 0
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float32'
loss_scale: 'dynamic'
task:
model:
num_classes: 1001
input_size: [224, 224, 3]
backbone:
type: 'mobilenet'
mobilenet:
model_id: 'MobileNetMultiAVG'
filter_size_scale: 1.0
dropout_rate: 0.3
losses:
l2_weight_decay: 0.000001
one_hot: true
label_smoothing: 0.1
train_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 512
dtype: 'float32'
validation_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 512
dtype: 'float32'
drop_remainder: false
quantization:
pretrained_original_checkpoint: 'gs://**/tf2_mhave_nobias_bn_aug05/28334857/ckpt-156000'
trainer:
# With below setting, the accuracy of QAT reaches to accuracy 0.7513 after 30 hours with 8 GPUS.
train_steps: 250200
validation_steps: 98
validation_interval: 2502
steps_per_loop: 2502
summary_interval: 2502
checkpoint_interval: 2502
optimizer_config:
learning_rate:
type: 'exponential'
exponential:
decay_rate: 0.9
decay_steps: 1251
initial_learning_rate: 0.0004
name: 'ExponentialDecay'
offset: 0
staircase: true
warmup:
type: 'linear'
linear:
warmup_steps: 0
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
task:
model:
num_classes: 1001
input_size: [224, 224, 3]
backbone:
type: 'mobilenet'
mobilenet:
model_id: 'MobileNetV3Large'
filter_size_scale: 1.0
dropout_rate: 0.3
losses:
l2_weight_decay: 1.0e-06 # 1/10 of original value.
one_hot: true
label_smoothing: 0.1
train_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 4096
dtype: 'float32'
aug_rand_hflip: true
aug_type:
autoaug:
augmentation_name: v0
cutout_const: 100
translate_const: 250
type: autoaug
drop_remainder: true
validation_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 4096
dtype: 'float32'
drop_remainder: false
aug_rand_hflip: true
quantization:
pretrained_original_checkpoint: 'gs://**/mobilenetv3_baseline_31/ckpt-156000'
trainer:
# With below setting, the accuracy of QAT reaches to accuracy 0.74.43 after ~2 hours with 4x4 DF.
train_steps: 62400
validation_steps: 13
validation_interval: 312
steps_per_loop: 312
summary_interval: 312
checkpoint_interval: 312
optimizer_config:
learning_rate:
cosine:
alpha: 0.0
decay_steps: 62400
initial_learning_rate: 0.0003 # 1/10 of original lr.
name: CosineDecay
offset: 0
type: cosine
optimizer:
adamw:
amsgrad: false
beta_1: 0.9
beta_2: 0.999
epsilon: 1.0e-07
gradient_clip_norm: 1.0
weight_decay_rate: 0.0
type: adamw
warmup:
type: 'linear'
linear:
warmup_steps: 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment