Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
7479dbb8
Unverified
Commit
7479dbb8
authored
Feb 15, 2022
by
Srihari Humbarwadi
Committed by
GitHub
Feb 15, 2022
Browse files
Merge branch 'tensorflow:master' into panoptic-deeplab-modeling
parents
8b60a5a8
9c8cbd0c
Changes
127
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
862 additions
and
25 deletions
+862
-25
official/legacy/detection/modeling/learning_rates.py
official/legacy/detection/modeling/learning_rates.py
+1
-1
official/legacy/image_classification/learning_rate.py
official/legacy/image_classification/learning_rate.py
+1
-1
official/modeling/optimization/configs/learning_rate_config.py
...ial/modeling/optimization/configs/learning_rate_config.py
+3
-3
official/modeling/optimization/lr_schedule.py
official/modeling/optimization/lr_schedule.py
+5
-5
official/modeling/optimization/optimizer_factory.py
official/modeling/optimization/optimizer_factory.py
+1
-1
official/nlp/data/create_finetuning_data.py
official/nlp/data/create_finetuning_data.py
+0
-1
official/nlp/modeling/models/t5.py
official/nlp/modeling/models/t5.py
+54
-5
official/nlp/modeling/models/t5_test.py
official/nlp/modeling/models/t5_test.py
+70
-0
official/nlp/tasks/sentence_prediction_test.py
official/nlp/tasks/sentence_prediction_test.py
+4
-2
official/pip_package/setup.py
official/pip_package/setup.py
+2
-2
official/projects/basnet/tasks/basnet.py
official/projects/basnet/tasks/basnet.py
+2
-4
official/projects/movinet/tools/quantize_movinet.py
official/projects/movinet/tools/quantize_movinet.py
+331
-0
official/projects/qat/vision/README.md
official/projects/qat/vision/README.md
+50
-0
official/projects/qat/vision/configs/__init__.py
official/projects/qat/vision/configs/__init__.py
+19
-0
official/projects/qat/vision/configs/common.py
official/projects/qat/vision/configs/common.py
+38
-0
official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv2_qat_gpu.yaml
...ts/image_classification/imagenet_mobilenetv2_qat_gpu.yaml
+53
-0
official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv2_qat_gpu_batch256.yaml
...classification/imagenet_mobilenetv2_qat_gpu_batch256.yaml
+53
-0
official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv2_qat_gpu_batch512.yaml
...classification/imagenet_mobilenetv2_qat_gpu_batch512.yaml
+53
-0
official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv3.5_qat_gpu.yaml
.../image_classification/imagenet_mobilenetv3.5_qat_gpu.yaml
+53
-0
official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv3large_qat_tpu.yaml
...age_classification/imagenet_mobilenetv3large_qat_tpu.yaml
+69
-0
No files found.
official/legacy/detection/modeling/learning_rates.py
View file @
7479dbb8
...
...
@@ -61,7 +61,7 @@ class CosineLearningRateWithLinearWarmup(
"""Class to generate learning rate tensor."""
def
__init__
(
self
,
total_steps
,
params
):
"""Creates the co
n
sine learning rate tensor with linear warmup."""
"""Creates the cosine learning rate tensor with linear warmup."""
super
(
CosineLearningRateWithLinearWarmup
,
self
).
__init__
()
self
.
_total_steps
=
total_steps
assert
isinstance
(
params
,
(
dict
,
params_dict
.
ParamsDict
))
...
...
official/legacy/image_classification/learning_rate.py
View file @
7479dbb8
...
...
@@ -78,7 +78,7 @@ class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor."""
def
__init__
(
self
,
batch_size
:
int
,
total_steps
:
int
,
warmup_steps
:
int
):
"""Creates the co
n
sine learning rate tensor with linear warmup.
"""Creates the cosine learning rate tensor with linear warmup.
Args:
batch_size: The training batch size used in the experiment.
...
...
official/modeling/optimization/configs/learning_rate_config.py
View file @
7479dbb8
...
...
@@ -216,14 +216,14 @@ class StepCosineLrConfig(base_config.Config):
"""Configuration for stepwise learning rate decay.
This class is a container for the piecewise cosine learning rate scheduling
configs. It will configure an instance of StepCo
n
sineDecayWithOffset keras
configs. It will configure an instance of StepCosineDecayWithOffset keras
learning rate schedule.
```python
boundaries: [100000, 110000]
values: [1.0, 0.5]
lr_decayed_fn = (
lr_schedule.StepCo
n
sineDecayWithOffset(
lr_schedule.StepCosineDecayWithOffset(
boundaries,
values))
```
...
...
@@ -243,7 +243,7 @@ class StepCosineLrConfig(base_config.Config):
[boundaries[n], end] -> values[n+1] to 0.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name
:
str
=
'StepCo
n
sineDecayWithOffset'
name
:
str
=
'StepCosineDecayWithOffset'
boundaries
:
Optional
[
List
[
int
]]
=
None
values
:
Optional
[
List
[
float
]]
=
None
offset
:
int
=
0
...
...
official/modeling/optimization/lr_schedule.py
View file @
7479dbb8
...
...
@@ -386,11 +386,11 @@ class PowerDecayWithOffset(tf.keras.optimizers.schedules.LearningRateSchedule):
}
class
StepCo
n
sineDecayWithOffset
(
class
StepCosineDecayWithOffset
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
"""Stepwise cosine learning rate decay with offset.
Learning rate is equivalent to one or more co
n
sine decay(s) starting and
Learning rate is equivalent to one or more cosine decay(s) starting and
ending at each interval.
ExampleL
...
...
@@ -399,7 +399,7 @@ class StepConsineDecayWithOffset(
boundaries: [100000, 110000]
values: [1.0, 0.5]
lr_decayed_fn = (
lr_schedule.StepCo
n
sineDecayWithOffset(
lr_schedule.StepCosineDecayWithOffset(
boundaries,
values))
```
...
...
@@ -412,7 +412,7 @@ class StepConsineDecayWithOffset(
boundaries
,
values
,
offset
:
int
=
0
,
name
:
str
=
"StepCo
n
sineDecayWithOffset"
):
name
:
str
=
"StepCosineDecayWithOffset"
):
"""Initialize configuration of the learning rate schedule.
Args:
...
...
@@ -444,7 +444,7 @@ class StepConsineDecayWithOffset(
]
+
[
0
])
def
__call__
(
self
,
global_step
):
with
tf
.
name_scope
(
self
.
name
or
"StepCo
n
sineDecayWithOffset"
):
with
tf
.
name_scope
(
self
.
name
or
"StepCosineDecayWithOffset"
):
global_step
=
tf
.
cast
(
global_step
-
self
.
offset
,
tf
.
float32
)
lr_levels
=
self
.
values
lr_steps
=
self
.
boundaries
...
...
official/modeling/optimization/optimizer_factory.py
View file @
7479dbb8
...
...
@@ -47,7 +47,7 @@ LR_CLS = {
'power'
:
lr_schedule
.
DirectPowerDecay
,
'power_linear'
:
lr_schedule
.
PowerAndLinearDecay
,
'power_with_offset'
:
lr_schedule
.
PowerDecayWithOffset
,
'step_cosine_with_offset'
:
lr_schedule
.
StepCo
n
sineDecayWithOffset
,
'step_cosine_with_offset'
:
lr_schedule
.
StepCosineDecayWithOffset
,
}
WARMUP_CLS
=
{
...
...
official/nlp/data/create_finetuning_data.py
View file @
7479dbb8
...
...
@@ -33,7 +33,6 @@ from official.nlp.tools import tokenization
FLAGS
=
flags
.
FLAGS
# TODO(chendouble): consider moving each task to its own binary.
flags
.
DEFINE_enum
(
"fine_tuning_task_type"
,
"classification"
,
[
"classification"
,
"regression"
,
"squad"
,
"retrieval"
,
"tagging"
],
...
...
official/nlp/modeling/models/t5.py
View file @
7479dbb8
...
...
@@ -1086,12 +1086,17 @@ class Encoder(Module):
self
.
output_dropout
=
Dropout
(
self
.
config
.
dropout_rate
,)
@
tf
.
Module
.
with_name_scope
def
__call__
(
self
,
inputs
,
encoder_mask
=
None
,
training
=
False
):
def
__call__
(
self
,
inputs
,
encoder_mask
=
None
,
dense_inputs
=
None
,
training
=
False
):
"""Applies Transformer model on the inputs.
Args:
inputs: input data
encoder_mask: the encoder self-attention mask.
dense_inputs: dense input data, concat after the embedding.
training: whether it is training pass, affecting dropouts.
Returns:
...
...
@@ -1102,11 +1107,20 @@ class Encoder(Module):
encoder_mask
=
tf
.
cast
(
encoder_mask
,
self
.
compute_dtype
)
cfg
=
self
.
config
x
=
self
.
input_embed
(
inputs
,
one_hot
=
cfg
.
one_hot_embedding
)
if
dense_inputs
is
not
None
:
x
=
tf
.
concat
([
x
,
dense_inputs
],
axis
=
1
)
tensor_shape
=
tf_utils
.
get_shape_list
(
x
)
tensor_shape
[
-
2
]
=
1
x
=
self
.
input_dropout
(
x
,
noise_shape
=
tensor_shape
,
training
=
training
)
input_length
=
tf_utils
.
get_shape_list
(
inputs
)[
1
]
position_bias
=
self
.
relative_embedding
(
input_length
,
input_length
)
if
dense_inputs
is
not
None
:
# Here we ignore relative position bias for dense embeddings.
dense_input_length
=
tf_utils
.
get_shape_list
(
dense_inputs
)[
1
]
# Position bias shape: [batch, 1, len, len]
paddings
=
tf
.
constant
([[
0
,
0
],
[
0
,
0
],
[
0
,
dense_input_length
],
[
0
,
dense_input_length
]])
position_bias
=
tf
.
pad
(
position_bias
,
paddings
,
"CONSTANT"
)
for
i
in
range
(
cfg
.
num_layers
):
x
=
self
.
encoder_layers
[
i
](
...
...
@@ -1308,31 +1322,56 @@ class T5Transformer(Module):
def
encode
(
self
,
encoder_input_tokens
,
encoder_segment_ids
=
None
,
encoder_dense_inputs
=
None
,
encoder_dense_segment_ids
=
None
,
training
=
False
):
eligible_positions
=
tf
.
cast
(
tf
.
not_equal
(
encoder_input_tokens
,
0
),
self
.
compute_dtype
)
if
encoder_dense_inputs
is
not
None
:
eligible_dense_position
=
tf
.
cast
(
tf
.
reduce_any
(
tf
.
not_equal
(
encoder_dense_inputs
,
0
),
axis
=-
1
),
self
.
compute_dtype
)
eligible_positions
=
tf
.
concat
(
[
eligible_positions
,
eligible_dense_position
],
axis
=
1
)
encoder_mask
=
make_attention_mask
(
eligible_positions
,
eligible_positions
,
dtype
=
tf
.
bool
)
if
encoder_segment_ids
is
not
None
:
if
encoder_dense_segment_ids
is
not
None
:
encoder_segment_ids
=
tf
.
concat
(
[
encoder_segment_ids
,
encoder_dense_segment_ids
],
axis
=
1
)
segment_mask
=
make_attention_mask
(
encoder_segment_ids
,
encoder_segment_ids
,
tf
.
equal
,
dtype
=
tf
.
bool
)
encoder_mask
=
tf
.
math
.
logical_and
(
encoder_mask
,
segment_mask
)
encoder_mask
=
(
1.0
-
tf
.
cast
(
encoder_mask
,
self
.
compute_dtype
))
*
-
1e9
return
self
.
encoder
(
encoder_input_tokens
,
encoder_mask
,
training
=
training
)
return
self
.
encoder
(
encoder_input_tokens
,
encoder_mask
,
encoder_dense_inputs
,
training
=
training
)
def
decode
(
self
,
encoded
,
decoder_target_tokens
,
encoder_input_tokens
,
# only used for masks
encoder_dense_inputs
=
None
,
decoder_input_tokens
=
None
,
encoder_segment_ids
=
None
,
encoder_dense_segment_ids
=
None
,
decoder_segment_ids
=
None
,
decode_position
=
None
,
cache
=
None
,
max_decode_len
=
None
,
decode
=
False
,
training
=
False
):
eligible_inputs
=
tf
.
cast
(
tf
.
not_equal
(
encoder_input_tokens
,
0
),
self
.
compute_dtype
)
if
encoder_dense_inputs
is
not
None
:
eligible_dense_inputs
=
tf
.
cast
(
tf
.
reduce_any
(
tf
.
not_equal
(
encoder_dense_inputs
,
0
),
axis
=-
1
),
self
.
compute_dtype
)
eligible_inputs
=
tf
.
concat
([
eligible_inputs
,
eligible_dense_inputs
],
axis
=
1
)
if
decode
:
# For decoding, the decoder_input_tokens is the decoder_target_tokens.
decoder_input_tokens
=
decoder_target_tokens
...
...
@@ -1342,14 +1381,12 @@ class T5Transformer(Module):
tf
.
cast
(
tf
.
not_equal
(
tf
.
ones_like
(
decoder_target_tokens
),
0
),
self
.
compute_dtype
),
tf
.
cast
(
tf
.
not_equal
(
encoder_input_tokens
,
0
),
self
.
compute_dtype
)
,
eligible_inputs
,
dtype
=
tf
.
bool
)
else
:
# Note that, masks should be created using decoder_target_tokens.
eligible_targets
=
tf
.
cast
(
tf
.
not_equal
(
decoder_target_tokens
,
0
),
self
.
compute_dtype
)
eligible_inputs
=
tf
.
cast
(
tf
.
not_equal
(
encoder_input_tokens
,
0
),
self
.
compute_dtype
)
decoder_mask
=
tf
.
math
.
logical_and
(
make_attention_mask
(
eligible_targets
,
eligible_targets
,
dtype
=
tf
.
bool
),
...
...
@@ -1365,6 +1402,9 @@ class T5Transformer(Module):
decoder_segment_ids
,
tf
.
equal
,
dtype
=
tf
.
bool
))
if
encoder_dense_segment_ids
is
not
None
:
encoder_segment_ids
=
tf
.
concat
(
[
encoder_segment_ids
,
encoder_dense_segment_ids
],
axis
=
1
)
encoder_decoder_mask
=
tf
.
math
.
logical_and
(
encoder_decoder_mask
,
make_attention_mask
(
...
...
@@ -1392,6 +1432,8 @@ class T5Transformer(Module):
def
__call__
(
self
,
encoder_input_tokens
,
decoder_target_tokens
,
encoder_dense_inputs
=
None
,
encoder_dense_segment_ids
=
None
,
decoder_input_tokens
=
None
,
encoder_segment_ids
=
None
,
decoder_segment_ids
=
None
,
...
...
@@ -1401,9 +1443,12 @@ class T5Transformer(Module):
Args:
encoder_input_tokens: input tokens to the encoder.
decoder_target_tokens: target tokens to the decoder.
encoder_dense_inputs: input dense vectors to the encoder.
encoder_dense_segment_ids: dense input segmentation info for packed
decoder_input_tokens: input tokens to the decoder, only required for
training.
encoder_segment_ids: input segmentation info for packed examples.
examples.
decoder_segment_ids: target segmentation info for packed examples.
training: whether it is training pass, affecting dropouts.
...
...
@@ -1413,13 +1458,17 @@ class T5Transformer(Module):
encoded
=
self
.
encode
(
encoder_input_tokens
,
encoder_segment_ids
=
encoder_segment_ids
,
encoder_dense_inputs
=
encoder_dense_inputs
,
encoder_dense_segment_ids
=
encoder_dense_segment_ids
,
training
=
training
)
outputs
=
self
.
decode
(
encoded
=
encoded
,
decoder_target_tokens
=
decoder_target_tokens
,
encoder_input_tokens
=
encoder_input_tokens
,
# only used for masks.
encoder_dense_inputs
=
encoder_dense_inputs
,
# only used for masks.
decoder_input_tokens
=
decoder_input_tokens
,
encoder_segment_ids
=
encoder_segment_ids
,
encoder_dense_segment_ids
=
encoder_dense_segment_ids
,
decoder_segment_ids
=
decoder_segment_ids
,
training
=
training
)
outputs
[
"encoded"
]
=
encoded
...
...
official/nlp/modeling/models/t5_test.py
View file @
7479dbb8
...
...
@@ -354,6 +354,24 @@ class T5Test(tf.test.TestCase, parameterized.TestCase):
encoded
=
encoder
(
tf
.
zeros
((
4
,
8
),
dtype
=
tf
.
int32
))
self
.
assertEqual
(
encoded
.
shape
,
(
4
,
8
,
config
.
d_model
))
@
parameterized
.
named_parameters
((
"bfloat16"
,
tf
.
bfloat16
),
(
"float32"
,
tf
.
float32
))
def
test_encoder_with_dense
(
self
,
dtype
):
config
=
t5
.
T5TransformerParams
(
num_layers
=
2
,
d_model
=
4
,
d_kv
=
3
,
num_heads
=
4
,
d_ff
=
16
,
vocab_size
=
10
,
vocab_embeddings_initializer
=
tf
.
keras
.
initializers
.
Ones
(),
relative_embeddings_initializer
=
tf
.
keras
.
initializers
.
Ones
())
encoder
=
t5
.
Encoder
(
config
,
compute_dtype
=
dtype
)
encoded
=
encoder
(
tf
.
zeros
((
4
,
8
),
dtype
=
tf
.
int32
),
dense_inputs
=
tf
.
ones
((
4
,
2
,
4
),
dtype
=
dtype
))
self
.
assertEqual
(
encoded
.
shape
,
(
4
,
10
,
config
.
d_model
))
def
test_decoder
(
self
):
max_decode_len
=
10
config
=
t5
.
T5TransformerParams
(
...
...
@@ -445,6 +463,58 @@ class T5Test(tf.test.TestCase, parameterized.TestCase):
print
(
v
.
name
,
v
.
shape
)
self
.
assertEqual
(
v
.
dtype
,
tf
.
float32
)
@
parameterized
.
named_parameters
(
(
"t5_10"
,
(
"relu"
,),
True
,
26
,
False
,
tf
.
float32
),)
def
test_transformer_with_dense
(
self
,
ffn_activations
,
logits_via_embedding
,
expect_num_variables
,
layer_sharing
,
dtype
):
max_decode_len
=
10
config
=
t5
.
T5TransformerParams
(
num_layers
=
1
,
d_model
=
8
,
d_kv
=
4
,
num_heads
=
4
,
d_ff
=
32
,
vocab_size
=
10
,
shared_embedding
=
True
,
layer_sharing
=
layer_sharing
,
ffn_activations
=
ffn_activations
,
logits_via_embedding
=
logits_via_embedding
)
transformer
=
t5
.
T5Transformer
(
config
,
compute_dtype
=
dtype
)
self
.
assertLen
(
transformer
.
trainable_variables
,
expect_num_variables
)
inputs
=
tf
.
convert_to_tensor
(
np
.
array
([[
2
,
2
,
1
,
3
,
1
,
0
],
[
3
,
3
,
1
,
2
,
2
,
1
]]))
segments
=
tf
.
convert_to_tensor
(
np
.
array
([[
1
,
1
,
1
,
2
,
2
,
0
],
[
1
,
1
,
1
,
2
,
2
,
2
]]))
dense_inputs
=
tf
.
convert_to_tensor
(
np
.
random
.
randn
(
2
,
2
,
8
),
dtype
=
dtype
)
dense_segments
=
tf
.
convert_to_tensor
(
np
.
array
([[
1
,
2
],
[
1
,
2
]]))
outputs
=
transformer
(
encoder_input_tokens
=
inputs
,
encoder_dense_inputs
=
dense_inputs
,
decoder_input_tokens
=
inputs
,
decoder_target_tokens
=
inputs
,
encoder_segment_ids
=
segments
,
encoder_dense_segment_ids
=
dense_segments
,
decoder_segment_ids
=
segments
)
cache
=
{}
batch_size
=
2
cache
[
0
]
=
_create_cache
(
batch_size
,
max_decode_len
,
config
.
num_heads
,
config
.
d_kv
,
dtype
=
dtype
)
outputs
=
transformer
.
decode
(
encoder_input_tokens
=
inputs
,
encoder_dense_inputs
=
dense_inputs
,
encoded
=
outputs
[
"encoded"
],
decoder_target_tokens
=
tf
.
ones
((
batch_size
,
1
),
dtype
=
tf
.
int32
),
decode_position
=
1
,
decode
=
True
,
max_decode_len
=
max_decode_len
,
cache
=
cache
)
self
.
assertEqual
(
outputs
[
"logits"
].
shape
,
(
batch_size
,
1
,
config
.
vocab_size
))
for
v
in
transformer
.
trainable_variables
:
print
(
v
.
name
,
v
.
shape
)
self
.
assertEqual
(
v
.
dtype
,
tf
.
float32
)
@
parameterized
.
named_parameters
(
(
"t5_10"
,
(
"relu"
,),
True
,
39
,
tf
.
float32
,
2
),
(
"t5_10_bfloat16"
,
(
"relu"
,),
True
,
39
,
tf
.
bfloat16
,
2
))
...
...
official/nlp/tasks/sentence_prediction_test.py
View file @
7479dbb8
...
...
@@ -32,10 +32,12 @@ def _create_fake_dataset(output_path, seq_length, num_classes, num_examples):
writer
=
tf
.
io
.
TFRecordWriter
(
output_path
)
def
create_int_feature
(
values
):
return
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
list
(
values
)))
return
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
np
.
ravel
(
values
)))
def
create_float_feature
(
values
):
return
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
list
(
values
)))
return
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
np
.
ravel
(
values
)))
for
i
in
range
(
num_examples
):
features
=
{}
...
...
official/pip_package/setup.py
View file @
7479dbb8
...
...
@@ -20,8 +20,8 @@ import sys
from
setuptools
import
find_packages
from
setuptools
import
setup
version
=
'2.
7
.0'
tf_version
=
'2.
7
.0'
# Major version.
version
=
'2.
8
.0'
tf_version
=
'2.
8
.0'
# Major version.
project_name
=
'tf-models-official'
...
...
official/projects/basnet/tasks/basnet.py
View file @
7479dbb8
...
...
@@ -203,8 +203,7 @@ class BASNetTask(base_task.Task):
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
experimental
.
LossScaleOptimizer
):
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
):
scaled_loss
=
optimizer
.
get_scaled_loss
(
scaled_loss
)
tvars
=
model
.
trainable_variables
...
...
@@ -212,8 +211,7 @@ class BASNetTask(base_task.Task):
# Scales back gradient before apply_gradients when LossScaleOptimizer is
# used.
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
experimental
.
LossScaleOptimizer
):
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
):
grads
=
optimizer
.
get_unscaled_gradients
(
grads
)
# Apply gradient clipping.
...
...
official/projects/movinet/tools/quantize_movinet.py
0 → 100644
View file @
7479dbb8
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r
"""Generates example dataset for post-training quantization.
Example command line to run the script:
```shell
python3 quantize_movinet.py \
--saved_model_dir=${SAVED_MODEL_DIR} \
--saved_model_with_states_dir=${SAVED_MODEL_WITH_STATES_DIR} \
--output_dataset_dir=${OUTPUT_DATASET_DIR} \
--output_tflite=${OUTPUT_TFLITE} \
--quantization_mode='int_float_fallback' \
--save_dataset_to_tfrecords=True
```
"""
import
functools
from
typing
import
Any
,
Callable
,
Mapping
,
Optional
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
numpy
as
np
import
tensorflow.compat.v2
as
tf
import
tensorflow_hub
as
hub
from
official.vision.beta.configs
import
video_classification
as
video_classification_configs
from
official.vision.beta.tasks
import
video_classification
tf
.
enable_v2_behavior
()
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
'saved_model_dir'
,
None
,
'The saved_model directory.'
)
flags
.
DEFINE_string
(
'saved_model_with_states_dir'
,
None
,
'The directory to the saved_model with state signature. '
'The saved_model_with_states is needed in order to get the initial state '
'shape and dtype while saved_model is used for the quantization.'
)
flags
.
DEFINE_string
(
'output_tflite'
,
'/tmp/output.tflite'
,
'The output tflite file path.'
)
flags
.
DEFINE_integer
(
'temporal_stride'
,
5
,
'Temporal stride used to generate input videos.'
)
flags
.
DEFINE_integer
(
'num_frames'
,
50
,
'Input videos number of frames.'
)
flags
.
DEFINE_integer
(
'image_size'
,
172
,
'Input videos frame size.'
)
flags
.
DEFINE_string
(
'quantization_mode'
,
None
,
'The quantization mode. Can be one of "float16", "int8",'
'"int_float_fallback" or None.'
)
flags
.
DEFINE_integer
(
'num_calibration_videos'
,
100
,
'Number of videos to run to generate example datasets.'
)
flags
.
DEFINE_integer
(
'num_samples_per_video'
,
3
,
'Number of sample draw from one single video.'
)
flags
.
DEFINE_boolean
(
'save_dataset_to_tfrecords'
,
False
,
'Whether to save representative dataset to the disk.'
)
flags
.
DEFINE_string
(
'output_dataset_dir'
,
'/tmp/representative_dataset/'
,
'The directory to store exported tfrecords.'
)
flags
.
DEFINE_integer
(
'max_saved_files'
,
100
,
'The maximum number of tfrecord files to save.'
)
def
_bytes_feature
(
value
):
"""Returns a bytes_list from a string / byte."""
if
isinstance
(
value
,
type
(
tf
.
constant
(
0
))):
value
=
value
.
numpy
()
# BytesList won't unpack string from an EagerTensor.
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
value
]))
def
_float_feature
(
value
):
"""Returns a float_list from a float / double."""
return
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
value
))
def
_int64_feature
(
value
):
"""Returns an int64_list from a bool / enum / int / uint."""
return
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
value
))
def
_build_tf_example
(
feature
):
return
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
feature
)).
SerializeToString
()
def
save_to_tfrecord
(
input_frame
:
tf
.
Tensor
,
input_states
:
Mapping
[
str
,
tf
.
Tensor
],
frame_index
:
int
,
predictions
:
tf
.
Tensor
,
output_states
:
Mapping
[
str
,
tf
.
Tensor
],
groundtruth_label_id
:
tf
.
Tensor
,
output_dataset_dir
:
str
,
file_index
:
int
):
"""Save results to tfrecord."""
features
=
{}
features
[
'frame_id'
]
=
_int64_feature
([
frame_index
])
features
[
'groundtruth_label'
]
=
_int64_feature
(
groundtruth_label_id
.
numpy
().
flatten
().
tolist
())
features
[
'predictions'
]
=
_float_feature
(
predictions
.
numpy
().
flatten
().
tolist
())
image_string
=
tf
.
io
.
encode_png
(
tf
.
squeeze
(
tf
.
cast
(
input_frame
*
255.
,
tf
.
uint8
),
axis
=
[
0
,
1
]))
features
[
'image'
]
=
_bytes_feature
(
image_string
.
numpy
())
# Input/Output states at time T
for
k
,
v
in
output_states
.
items
():
dtype
=
v
[
0
].
dtype
if
dtype
==
tf
.
int32
:
features
[
'input/'
+
k
]
=
_int64_feature
(
input_states
[
k
].
numpy
().
flatten
().
tolist
())
features
[
'output/'
+
k
]
=
_int64_feature
(
output_states
[
k
].
numpy
().
flatten
().
tolist
())
elif
dtype
==
tf
.
float32
:
features
[
'input/'
+
k
]
=
_float_feature
(
input_states
[
k
].
numpy
().
flatten
().
tolist
())
features
[
'output/'
+
k
]
=
_float_feature
(
output_states
[
k
].
numpy
().
flatten
().
tolist
())
else
:
raise
ValueError
(
f
'Unrecongized dtype:
{
dtype
}
'
)
tfe
=
_build_tf_example
(
features
)
record_file
=
'{}/movinet_stream_{:06d}.tfrecords'
.
format
(
output_dataset_dir
,
file_index
)
logging
.
info
(
'Saving to %s.'
,
record_file
)
with
tf
.
io
.
TFRecordWriter
(
record_file
)
as
writer
:
writer
.
write
(
tfe
)
def
get_dataset
()
->
tf
.
data
.
Dataset
:
"""Gets dataset source."""
config
=
video_classification_configs
.
video_classification_kinetics600
()
temporal_stride
=
FLAGS
.
temporal_stride
num_frames
=
FLAGS
.
num_frames
image_size
=
FLAGS
.
image_size
feature_shape
=
(
num_frames
,
image_size
,
image_size
,
3
)
config
.
task
.
validation_data
.
global_batch_size
=
1
config
.
task
.
validation_data
.
feature_shape
=
feature_shape
config
.
task
.
validation_data
.
temporal_stride
=
temporal_stride
config
.
task
.
train_data
.
min_image_size
=
int
(
1.125
*
image_size
)
config
.
task
.
validation_data
.
dtype
=
'float32'
config
.
task
.
validation_data
.
drop_remainder
=
False
task
=
video_classification
.
VideoClassificationTask
(
config
.
task
)
valid_dataset
=
task
.
build_inputs
(
config
.
task
.
validation_data
)
valid_dataset
=
valid_dataset
.
map
(
lambda
x
,
y
:
(
x
[
'image'
],
y
))
valid_dataset
=
valid_dataset
.
prefetch
(
32
)
return
valid_dataset
def
stateful_representative_dataset_generator
(
model
:
tf
.
keras
.
Model
,
dataset_iter
:
Any
,
init_states
:
Mapping
[
str
,
tf
.
Tensor
],
save_dataset_to_tfrecords
:
bool
=
False
,
max_saved_files
:
int
=
100
,
output_dataset_dir
:
Optional
[
str
]
=
None
,
num_samples_per_video
:
int
=
3
,
num_calibration_videos
:
int
=
100
):
"""Generates sample input data with states.
Args:
model: the inference keras model.
dataset_iter: the dataset source.
init_states: the initial states for the model.
save_dataset_to_tfrecords: whether to save the representative dataset to
tfrecords on disk.
max_saved_files: the max number of saved tfrecords files.
output_dataset_dir: the directory to store the saved tfrecords.
num_samples_per_video: number of randomly sampled frames per video.
num_calibration_videos: number of calibration videos to run.
Yields:
A dictionary of model inputs.
"""
counter
=
0
for
i
in
range
(
num_calibration_videos
):
if
i
%
100
==
0
:
logging
.
info
(
'Reading representative dateset id %d.'
,
i
)
example_input
,
example_label
=
next
(
dataset_iter
)
groundtruth_label_id
=
tf
.
argmax
(
example_label
,
axis
=-
1
)
input_states
=
init_states
# split video into frames along the temporal dimension.
frames
=
tf
.
split
(
example_input
,
example_input
.
shape
[
1
],
axis
=
1
)
random_indices
=
np
.
random
.
randint
(
low
=
1
,
high
=
len
(
frames
),
size
=
num_samples_per_video
)
# always include the first frame
random_indices
[
0
]
=
0
random_indices
=
set
(
random_indices
)
for
frame_index
,
frame
in
enumerate
(
frames
):
predictions
,
output_states
=
model
({
'image'
:
frame
,
**
input_states
})
if
frame_index
in
random_indices
:
if
save_dataset_to_tfrecords
and
counter
<
max_saved_files
:
save_to_tfrecord
(
input_frame
=
frame
,
input_states
=
input_states
,
frame_index
=
frame_index
,
predictions
=
predictions
,
output_states
=
output_states
,
groundtruth_label_id
=
groundtruth_label_id
,
output_dataset_dir
=
output_dataset_dir
,
file_index
=
counter
)
yield
{
'image'
:
frame
,
**
input_states
}
counter
+=
1
# update states for the next inference step
input_states
=
output_states
def
get_tflite_converter
(
saved_model_dir
:
str
,
quantization_mode
:
str
,
representative_dataset
:
Optional
[
Callable
[...,
Any
]]
=
None
)
->
tf
.
lite
.
TFLiteConverter
:
"""Gets tflite converter."""
converter
=
tf
.
lite
.
TFLiteConverter
.
from_saved_model
(
saved_model_dir
=
saved_model_dir
)
converter
.
optimizations
=
[
tf
.
lite
.
Optimize
.
DEFAULT
]
if
quantization_mode
==
'float16'
:
logging
.
info
(
'Using float16 quantization.'
)
converter
.
target_spec
.
supported_types
=
[
tf
.
float16
]
elif
quantization_mode
==
'int8'
:
logging
.
info
(
'Using full interger quantization.'
)
converter
.
representative_dataset
=
representative_dataset
converter
.
target_spec
.
supported_ops
=
[
tf
.
lite
.
OpsSet
.
TFLITE_BUILTINS_INT8
]
converter
.
inference_input_type
=
tf
.
int8
converter
.
inference_output_type
=
tf
.
int8
elif
quantization_mode
==
'int_float_fallback'
:
logging
.
info
(
'Using interger quantization with float-point fallback.'
)
converter
.
representative_dataset
=
representative_dataset
else
:
logging
.
info
(
'Using dynamic range quantization.'
)
return
converter
def
quantize_movinet
(
dataset_fn
):
"""Quantizes Movinet."""
valid_dataset
=
dataset_fn
()
dataset_iter
=
iter
(
valid_dataset
)
# Load model
encoder
=
hub
.
KerasLayer
(
FLAGS
.
saved_model_with_states_dir
,
trainable
=
False
)
inputs
=
tf
.
keras
.
layers
.
Input
(
shape
=
[
1
,
FLAGS
.
image_size
,
FLAGS
.
image_size
,
3
],
dtype
=
tf
.
float32
,
name
=
'image'
)
# Define the state inputs, which is a dict that maps state names to tensors.
init_states_fn
=
encoder
.
resolved_object
.
signatures
[
'init_states'
]
state_shapes
=
{
name
:
([
s
if
s
>
0
else
None
for
s
in
state
.
shape
],
state
.
dtype
)
for
name
,
state
in
init_states_fn
(
tf
.
constant
([
1
,
1
,
FLAGS
.
image_size
,
FLAGS
.
image_size
,
3
])).
items
()
}
states_input
=
{
name
:
tf
.
keras
.
Input
(
shape
[
1
:],
dtype
=
dtype
,
name
=
name
)
for
name
,
(
shape
,
dtype
)
in
state_shapes
.
items
()
}
# The inputs to the model are the states and the video
inputs
=
{
**
states_input
,
'image'
:
inputs
}
outputs
=
encoder
(
inputs
)
model
=
tf
.
keras
.
Model
(
inputs
,
outputs
,
name
=
'movinet_stream'
)
input_shape
=
tf
.
constant
(
[
1
,
FLAGS
.
num_frames
,
FLAGS
.
image_size
,
FLAGS
.
image_size
,
3
])
init_states
=
init_states_fn
(
input_shape
)
# config representative_datset_fn
representative_dataset
=
functools
.
partial
(
stateful_representative_dataset_generator
,
model
=
model
,
dataset_iter
=
dataset_iter
,
init_states
=
init_states
,
save_dataset_to_tfrecords
=
FLAGS
.
save_dataset_to_tfrecords
,
max_saved_files
=
FLAGS
.
max_saved_files
,
output_dataset_dir
=
FLAGS
.
output_dataset_dir
,
num_samples_per_video
=
FLAGS
.
num_samples_per_video
,
num_calibration_videos
=
FLAGS
.
num_calibration_videos
)
converter
=
get_tflite_converter
(
saved_model_dir
=
FLAGS
.
saved_model_dir
,
quantization_mode
=
FLAGS
.
quantization_mode
,
representative_dataset
=
representative_dataset
)
logging
.
info
(
'Converting...'
)
tflite_buffer
=
converter
.
convert
()
return
tflite_buffer
def
main
(
_
):
tflite_buffer
=
quantize_movinet
(
dataset_fn
=
get_dataset
)
with
open
(
FLAGS
.
output_tflite
,
'wb'
)
as
f
:
f
.
write
(
tflite_buffer
)
logging
.
info
(
'tflite model written to %s'
,
FLAGS
.
output_tflite
)
if
__name__
==
'__main__'
:
flags
.
mark_flag_as_required
(
'saved_model_dir'
)
flags
.
mark_flag_as_required
(
'saved_model_with_states_dir'
)
app
.
run
(
main
)
official/projects/qat/vision/README.md
0 → 100644
View file @
7479dbb8
# Quantization Aware Training Project for Computer Vision Models
[TOC]
⚠️ Disclaimer: All datasets hyperlinked from this page are not owned or
distributed by Google. The dataset is made available by third parties.
Please review the terms and conditions made available by the third parties
before using the data.
## Overview
This project includes quantization aware training code for Computer Vision
models. These are examples to show how to apply the Model Optimization Toolkit's
[
quantization aware training API
](
https://www.tensorflow.org/model_optimization/guide/quantization/training
)
.
Note: Currently, we support a limited number of ML tasks & models (e.g., image
classification and semantic segmentation)
We will keep adding support for other ML tasks and models in the next releases.
## How to train a model
```
EXPERIMENT=xxx # Change this for your run, for example, 'mobilenet_imagenet_qat'
CONFIG_FILE=xxx # Change this for your run, for example, path of imagenet_mobilenetv2_qat_gpu.yaml
MODEL_DIR=xxx # Change this for your run, for example, /tmp/model_dir
$ python3 train.py \
--experiment=${EXPERIMENT} \
--config_file=${CONFIG_FILE} \
--model_dir=${MODEL_DIR} \
--mode=train_and_eval
```
## Model Accuracy
<figure
align=
"center"
>
<img
width=
70%
src=
https://storage.googleapis.com/tf_model_garden/models/qat/images/readme-qat-classification-plot.png
>
<figcaption>
Comparison of Imagenet top-1 accuracy for the classification models
</figcaption>
</figure>
Note: The Top-1 model accuracy is measured on the validation set of
[
ImageNet
](
https://www.image-net.org/
)
.
### Pre-trained Models
|Model |Resolution|Top-1 Accuracy (FP32)|Top-1 Accuracy (Int8/PTQ)|Top-1 Accuracy (Int8/QAT)|Config |Download |
|----------------------|----------|---------------------|-------------------------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------|
|MobileNetV2 |224x224 |72.782% |72.392% |72.792% |
[
config
](
https://github.com/tensorflow/models/blob/master/official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv2_qat_gpu.yaml
)
|
[
TFLite(Int8/QAT)
](
https://storage.googleapis.com/tf_model_garden/vision/mobilenet/v2_1.0_int8/mobilenet_v2_1.00_224_int8.tflite
)
|
|ResNet50 |224x224 |76.710% |76.420% |77.200% |
[
config
](
https://github.com/tensorflow/models/blob/master/official/projects/qat/vision/configs/experiments/image_classification/imagenet_resnet50_qat_gpu.yaml
)
|
[
TFLite(Int8/QAT)
](
https://storage.googleapis.com/tf_model_garden/vision/resnet50_imagenet/resnet_50_224_int8.tflite
)
|
|MobileNetV3.5 MultiAVG|224x224 |75.212% |74.122% |75.130% |
[
config
](
https://github.com/tensorflow/models/blob/master/official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv3.5_qat_gpu.yaml
)
|
[
TFLite(Int8/QAT)
](
https://storage.googleapis.com/tf_model_garden/vision/mobilenet/v3.5multiavg_1.0_int8/mobilenet_v3.5multiavg_1.00_224_int8.tflite
)
|
official/projects/qat/vision/configs/__init__.py
0 → 100644
View file @
7479dbb8
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Configs package definition."""
from
official.projects.qat.vision.configs
import
image_classification
from
official.projects.qat.vision.configs
import
semantic_segmentation
official/projects/qat/vision/configs/common.py
0 → 100644
View file @
7479dbb8
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Image classification configuration definition."""
import
dataclasses
from
typing
import
Optional
from
official.modeling
import
hyperparams
@
dataclasses
.
dataclass
class
Quantization
(
hyperparams
.
Config
):
"""Quantization parameters.
Attributes:
pretrained_original_checkpoint: A string indicate pretrained checkpoint
location.
change_num_bits: A `bool` indicates whether to manually allocate num_bits.
num_bits_weight: An `int` number of bits for weight. Default to 8.
num_bits_activation: An `int` number of bits for activation. Default to 8.
"""
pretrained_original_checkpoint
:
Optional
[
str
]
=
None
change_num_bits
:
bool
=
False
num_bits_weight
:
int
=
8
num_bits_activation
:
int
=
8
official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv2_qat_gpu.yaml
0 → 100644
View file @
7479dbb8
runtime
:
distribution_strategy
:
'
mirrored'
mixed_precision_dtype
:
'
float32'
loss_scale
:
'
dynamic'
task
:
model
:
num_classes
:
1001
input_size
:
[
224
,
224
,
3
]
backbone
:
type
:
'
mobilenet'
mobilenet
:
model_id
:
'
MobileNetV2'
filter_size_scale
:
1.0
dropout_rate
:
0.1
losses
:
l2_weight_decay
:
0.0000001
one_hot
:
true
label_smoothing
:
0.1
train_data
:
input_path
:
'
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training
:
true
global_batch_size
:
512
# 64 * 8
dtype
:
'
float32'
validation_data
:
input_path
:
'
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training
:
false
global_batch_size
:
512
# 64 * 8
dtype
:
'
float32'
drop_remainder
:
false
quantization
:
pretrained_original_checkpoint
:
'
gs://**/mobilenetv2_gpu/22984194/ckpt-625500'
trainer
:
# With below setting, the accuracy of QAT reaches to accuracy 0.7279 after 43 hours with 8 GPUS.
train_steps
:
250200
validation_steps
:
98
validation_interval
:
2502
steps_per_loop
:
2502
summary_interval
:
2502
checkpoint_interval
:
2502
optimizer_config
:
learning_rate
:
type
:
'
exponential'
exponential
:
decay_rate
:
0.9
decay_steps
:
1251
initial_learning_rate
:
0.0001
name
:
'
ExponentialDecay'
offset
:
0
staircase
:
true
warmup
:
type
:
'
linear'
linear
:
warmup_steps
:
0
official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv2_qat_gpu_batch256.yaml
0 → 100644
View file @
7479dbb8
runtime
:
distribution_strategy
:
'
mirrored'
mixed_precision_dtype
:
'
float32'
loss_scale
:
'
dynamic'
task
:
model
:
num_classes
:
1001
input_size
:
[
224
,
224
,
3
]
backbone
:
type
:
'
mobilenet'
mobilenet
:
model_id
:
'
MobileNetV2'
filter_size_scale
:
1.0
dropout_rate
:
0.0
# changed from 0.2 to 0.0
losses
:
l2_weight_decay
:
0.0000001
one_hot
:
true
label_smoothing
:
0.1
train_data
:
input_path
:
'
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training
:
true
global_batch_size
:
256
dtype
:
'
float32'
validation_data
:
input_path
:
'
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training
:
false
global_batch_size
:
256
dtype
:
'
float32'
drop_remainder
:
false
quantization
:
pretrained_original_checkpoint
:
'
gs://**/mobilenetv2_gpu/22984194/ckpt-625500'
trainer
:
# With below setting, the accuracy of QAT reaches Top1-accuracy 0.7251 at 420336 steps after
# 1 day 19 hours of training with 8GPUs, which is higher than the result of PTQ in MobileNetV2.
train_steps
:
1000800
# 200 epochs
validation_steps
:
196
# NUM_EXAMPLES (50000) // global_batch_size (256)
validation_interval
:
5004
# 1 epoch
steps_per_loop
:
5004
# NUM_EXAMPLES (1281167) // global_batch_size (256)
summary_interval
:
5004
# 1 epoch
checkpoint_interval
:
5004
# 1 epoch
max_to_keep
:
200
optimizer_config
:
learning_rate
:
type
:
'
exponential'
exponential
:
initial_learning_rate
:
0.0001
decay_steps
:
1251
# steps_per_epoch // 4
decay_rate
:
0.96
staircase
:
true
warmup
:
type
:
'
linear'
linear
:
warmup_steps
:
0
official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv2_qat_gpu_batch512.yaml
0 → 100644
View file @
7479dbb8
runtime
:
distribution_strategy
:
'
mirrored'
mixed_precision_dtype
:
'
float32'
loss_scale
:
'
dynamic'
task
:
model
:
num_classes
:
1001
input_size
:
[
224
,
224
,
3
]
backbone
:
type
:
'
mobilenet'
mobilenet
:
model_id
:
'
MobileNetV2'
filter_size_scale
:
1.0
dropout_rate
:
0.0
# changed from 0.2 to 0.0
losses
:
l2_weight_decay
:
0.0000001
one_hot
:
true
label_smoothing
:
0.1
train_data
:
input_path
:
'
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training
:
true
global_batch_size
:
512
dtype
:
'
float32'
validation_data
:
input_path
:
'
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training
:
false
global_batch_size
:
512
dtype
:
'
float32'
drop_remainder
:
false
quantization
:
pretrained_original_checkpoint
:
'
gs://**/mobilenetv2_gpu/22984194/ckpt-625500'
trainer
:
# With below setting, the accuracy of QAT reaches Top1-accuracy 0.7266 at 312750 steps after
# 1 day 22 hours of training with 8GPUs, which is higher than the result of PTQ in MobileNetV2.
train_steps
:
500400
# 200 epochs
validation_steps
:
98
# NUM_EXAMPLES (50000) // global_batch_size (512)
validation_interval
:
2502
# 1 epoch
steps_per_loop
:
2502
# NUM_EXAMPLES (1281167) // global_batch_size (512)
summary_interval
:
2502
# 1 epoch
checkpoint_interval
:
2502
# 1 epoch
max_to_keep
:
200
optimizer_config
:
learning_rate
:
type
:
'
exponential'
exponential
:
initial_learning_rate
:
0.0002
decay_steps
:
1251
# steps_per_epoch // 2
decay_rate
:
0.96
staircase
:
true
warmup
:
type
:
'
linear'
linear
:
warmup_steps
:
0
official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv3.5_qat_gpu.yaml
0 → 100644
View file @
7479dbb8
runtime
:
distribution_strategy
:
'
mirrored'
mixed_precision_dtype
:
'
float32'
loss_scale
:
'
dynamic'
task
:
model
:
num_classes
:
1001
input_size
:
[
224
,
224
,
3
]
backbone
:
type
:
'
mobilenet'
mobilenet
:
model_id
:
'
MobileNetMultiAVG'
filter_size_scale
:
1.0
dropout_rate
:
0.3
losses
:
l2_weight_decay
:
0.000001
one_hot
:
true
label_smoothing
:
0.1
train_data
:
input_path
:
'
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training
:
true
global_batch_size
:
512
dtype
:
'
float32'
validation_data
:
input_path
:
'
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training
:
false
global_batch_size
:
512
dtype
:
'
float32'
drop_remainder
:
false
quantization
:
pretrained_original_checkpoint
:
'
gs://**/tf2_mhave_nobias_bn_aug05/28334857/ckpt-156000'
trainer
:
# With below setting, the accuracy of QAT reaches to accuracy 0.7513 after 30 hours with 8 GPUS.
train_steps
:
250200
validation_steps
:
98
validation_interval
:
2502
steps_per_loop
:
2502
summary_interval
:
2502
checkpoint_interval
:
2502
optimizer_config
:
learning_rate
:
type
:
'
exponential'
exponential
:
decay_rate
:
0.9
decay_steps
:
1251
initial_learning_rate
:
0.0004
name
:
'
ExponentialDecay'
offset
:
0
staircase
:
true
warmup
:
type
:
'
linear'
linear
:
warmup_steps
:
0
official/projects/qat/vision/configs/experiments/image_classification/imagenet_mobilenetv3large_qat_tpu.yaml
0 → 100644
View file @
7479dbb8
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
float32'
task
:
model
:
num_classes
:
1001
input_size
:
[
224
,
224
,
3
]
backbone
:
type
:
'
mobilenet'
mobilenet
:
model_id
:
'
MobileNetV3Large'
filter_size_scale
:
1.0
dropout_rate
:
0.3
losses
:
l2_weight_decay
:
1.0e-06
# 1/10 of original value.
one_hot
:
true
label_smoothing
:
0.1
train_data
:
input_path
:
'
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training
:
true
global_batch_size
:
4096
dtype
:
'
float32'
aug_rand_hflip
:
true
aug_type
:
autoaug
:
augmentation_name
:
v0
cutout_const
:
100
translate_const
:
250
type
:
autoaug
drop_remainder
:
true
validation_data
:
input_path
:
'
/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training
:
false
global_batch_size
:
4096
dtype
:
'
float32'
drop_remainder
:
false
aug_rand_hflip
:
true
quantization
:
pretrained_original_checkpoint
:
'
gs://**/mobilenetv3_baseline_31/ckpt-156000'
trainer
:
# With below setting, the accuracy of QAT reaches to accuracy 0.74.43 after ~2 hours with 4x4 DF.
train_steps
:
62400
validation_steps
:
13
validation_interval
:
312
steps_per_loop
:
312
summary_interval
:
312
checkpoint_interval
:
312
optimizer_config
:
learning_rate
:
cosine
:
alpha
:
0.0
decay_steps
:
62400
initial_learning_rate
:
0.0003
# 1/10 of original lr.
name
:
CosineDecay
offset
:
0
type
:
cosine
optimizer
:
adamw
:
amsgrad
:
false
beta_1
:
0.9
beta_2
:
0.999
epsilon
:
1.0e-07
gradient_clip_norm
:
1.0
weight_decay_rate
:
0.0
type
:
adamw
warmup
:
type
:
'
linear'
linear
:
warmup_steps
:
0
Prev
1
2
3
4
5
…
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment