Commit a5bbb547 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 457023539
parent 7e4488ae
# MobileNetV3-large_1.0 ImageNet classification: 74.96% top-1. # MobileNetV3-large_1.0 ImageNet classification: ~75.3% top-1.
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16' mixed_precision_dtype: 'bfloat16'
...@@ -11,9 +11,13 @@ task: ...@@ -11,9 +11,13 @@ task:
mobilenet: mobilenet:
model_id: 'MobileNetV3Large' model_id: 'MobileNetV3Large'
filter_size_scale: 1.0 filter_size_scale: 1.0
kernel_initializer: 'random_uniform'
norm_activation:
norm_epsilon: 0.001
norm_momentum: 0.997
dropout_rate: 0.2 dropout_rate: 0.2
losses: losses:
l2_weight_decay: 0.00001 l2_weight_decay: 0.0
one_hot: true one_hot: true
label_smoothing: 0.1 label_smoothing: 0.1
train_data: train_data:
...@@ -21,16 +25,18 @@ task: ...@@ -21,16 +25,18 @@ task:
is_training: true is_training: true
global_batch_size: 4096 global_batch_size: 4096
dtype: 'bfloat16' dtype: 'bfloat16'
# Enables Inception-style pre-processing. aug_type:
decode_jpeg_only: false autoaug:
augmentation_name: v0
cutout_const: 100
translate_const: 250
type: autoaug
validation_data: validation_data:
input_path: 'imagenet-2012-tfrecord/valid*' input_path: 'imagenet-2012-tfrecord/valid*'
is_training: false is_training: false
global_batch_size: 4096 global_batch_size: 4096
dtype: 'bfloat16' dtype: 'bfloat16'
drop_remainder: false drop_remainder: false
# Enables Inception-style pre-processing.
decode_jpeg_only: false
trainer: trainer:
train_steps: 156000 # 500 epochs train_steps: 156000 # 500 epochs
validation_steps: 13 validation_steps: 13
...@@ -40,14 +46,25 @@ trainer: ...@@ -40,14 +46,25 @@ trainer:
checkpoint_interval: 312 checkpoint_interval: 312
optimizer_config: optimizer_config:
learning_rate: learning_rate:
type: 'cosine'
cosine: cosine:
alpha: 0.0 alpha: 0.0
decay_steps: 156000 decay_steps: 156000
initial_learning_rate: 0.5 initial_learning_rate: 0.004
name: CosineDecay name: CosineDecay
offset: 0 offset: 0
warmup: type: 'cosine'
type: 'linear' optimizer:
linear: adamw:
warmup_steps: 5000 amsgrad: false
beta_1: 0.9
beta_2: 0.999
clipnorm: null
clipvalue: null
epsilon: 1.0e-07
exclude_from_weight_decay: ['batch_normalization']
global_clipnorm: null
gradient_clip_norm: 0.0
include_in_weight_decay: null
name: 'AdamWeightDecay'
weight_decay_rate: 0.1
type: 'adamw'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment