"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "b7ba040e382c9d964a481f374719791852e535a1"
Commit 9070109e authored by Shixin Luo's avatar Shixin Luo
Browse files

change the default number of gpu and tpu for the experiment config template

parent bf4c3894
...@@ -19,21 +19,21 @@ task: ...@@ -19,21 +19,21 @@ task:
train_data: train_data:
input_path: 'imagenet-2012-tfrecord/train*' input_path: 'imagenet-2012-tfrecord/train*'
is_training: True is_training: True
global_batch_size: 96 global_batch_size: 768 # 96 * 8
dtype: 'float32' dtype: 'float32'
validation_data: validation_data:
input_path: 'imagenet-2012-tfrecord/valid*' input_path: 'imagenet-2012-tfrecord/valid*'
is_training: False is_training: False
global_batch_size: 96 global_batch_size: 768 # 96 * 8
dtype: 'float32' dtype: 'float32'
drop_remainder: False drop_remainder: False
trainer: trainer:
train_steps: 1201050 # 90 * steps_per_epoch train_steps: 150120 # 90 * steps_per_epoch
validation_steps: 520 validation_steps: 65
validation_interval: 13345 validation_interval: 1668
steps_per_loop: 13345 # NUM_EXAMPLES (1281167) // global_batch_size steps_per_loop: 1668 # NUM_EXAMPLES (1281167) // global_batch_size
summary_interval: 13345 summary_interval: 1668
checkpoint_interval: 13345 checkpoint_interval: 1668
optimizer_config: optimizer_config:
optimizer: optimizer:
type: 'rmsprop' type: 'rmsprop'
...@@ -44,13 +44,13 @@ trainer: ...@@ -44,13 +44,13 @@ trainer:
learning_rate: learning_rate:
type: 'exponential' type: 'exponential'
exponential: exponential:
initial_learning_rate: 0.045, initial_learning_rate: 0.36, # 0.045 * NUM_GPUS
decay_steps: 33362, # 2.5 * steps_per_epoch decay_steps: 4170, # 2.5 * steps_per_epoch
decay_rate: 0.97, decay_rate: 0.97,
staircase: True staircase: True
warmup: warmup:
type: 'linear' type: 'linear'
linear: linear:
warmup_steps: 66725 # 5 * steps_per_epoch warmup_steps: 8340 # 5 * steps_per_epoch
ema: ema:
average_decay: 0.9999 average_decay: 0.9999
\ No newline at end of file
...@@ -218,8 +218,8 @@ def image_classification_imagenet_revnet() -> cfg.ExperimentConfig: ...@@ -218,8 +218,8 @@ def image_classification_imagenet_revnet() -> cfg.ExperimentConfig:
@exp_factory.register_config_factory('mobilenet_imagenet') @exp_factory.register_config_factory('mobilenet_imagenet')
def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig: def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
"""Image classification on imagenet with mobilenet.""" """Image classification on imagenet with mobilenet."""
train_batch_size = 192 train_batch_size = 1536 # 96 * 16
eval_batch_size = 192 eval_batch_size = 1536 # 96 * 16
steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
config = cfg.ExperimentConfig( config = cfg.ExperimentConfig(
task=ImageClassificationTask( task=ImageClassificationTask(
...@@ -261,9 +261,12 @@ def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig: ...@@ -261,9 +261,12 @@ def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
'learning_rate': { 'learning_rate': {
'type': 'exponential', 'type': 'exponential',
'exponential': { 'exponential': {
'initial_learning_rate': 0.045, # 0.045 * NUM_GPUS
'decay_steps': int(2.4 * steps_per_epoch), 'initial_learning_rate': 0.045 * (train_batch_size // 96),
'decay_rate': 0.97, # (2.5 / NUM_GPUS) epochs
'decay_steps': int((2.5 / (train_batch_size // 96))
* steps_per_epoch),
'decay_rate': 0.98,
'staircase': True 'staircase': True
} }
}, },
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment