Commit 9070109e authored by Shixin Luo's avatar Shixin Luo
Browse files

change the default number of gpu and tpu for the experiment config template

parent bf4c3894
...@@ -19,21 +19,21 @@ task: ...@@ -19,21 +19,21 @@ task:
train_data: train_data:
input_path: 'imagenet-2012-tfrecord/train*' input_path: 'imagenet-2012-tfrecord/train*'
is_training: True is_training: True
global_batch_size: 96 global_batch_size: 768 # 96 * 8
dtype: 'float32' dtype: 'float32'
validation_data: validation_data:
input_path: 'imagenet-2012-tfrecord/valid*' input_path: 'imagenet-2012-tfrecord/valid*'
is_training: False is_training: False
global_batch_size: 96 global_batch_size: 768 # 96 * 8
dtype: 'float32' dtype: 'float32'
drop_remainder: False drop_remainder: False
trainer: trainer:
train_steps: 1201050 # 90 * steps_per_epoch train_steps: 150120 # 90 * steps_per_epoch
validation_steps: 520 validation_steps: 65
validation_interval: 13345 validation_interval: 1668
steps_per_loop: 13345 # NUM_EXAMPLES (1281167) // global_batch_size steps_per_loop: 1668 # NUM_EXAMPLES (1281167) // global_batch_size
summary_interval: 13345 summary_interval: 1668
checkpoint_interval: 13345 checkpoint_interval: 1668
optimizer_config: optimizer_config:
optimizer: optimizer:
type: 'rmsprop' type: 'rmsprop'
...@@ -44,13 +44,13 @@ trainer: ...@@ -44,13 +44,13 @@ trainer:
learning_rate: learning_rate:
type: 'exponential' type: 'exponential'
exponential: exponential:
initial_learning_rate: 0.045, initial_learning_rate: 0.36, # 0.045 * NUM_GPUS
decay_steps: 33362, # 2.5 * steps_per_epoch decay_steps: 4170, # 2.5 * steps_per_epoch
decay_rate: 0.97, decay_rate: 0.97,
staircase: True staircase: True
warmup: warmup:
type: 'linear' type: 'linear'
linear: linear:
warmup_steps: 66725 # 5 * steps_per_epoch warmup_steps: 8340 # 5 * steps_per_epoch
ema: ema:
average_decay: 0.9999 average_decay: 0.9999
\ No newline at end of file
...@@ -218,8 +218,8 @@ def image_classification_imagenet_revnet() -> cfg.ExperimentConfig: ...@@ -218,8 +218,8 @@ def image_classification_imagenet_revnet() -> cfg.ExperimentConfig:
@exp_factory.register_config_factory('mobilenet_imagenet') @exp_factory.register_config_factory('mobilenet_imagenet')
def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig: def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
"""Image classification on imagenet with mobilenet.""" """Image classification on imagenet with mobilenet."""
train_batch_size = 192 train_batch_size = 1536 # 96 * 16
eval_batch_size = 192 eval_batch_size = 1536 # 96 * 16
steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
config = cfg.ExperimentConfig( config = cfg.ExperimentConfig(
task=ImageClassificationTask( task=ImageClassificationTask(
...@@ -261,9 +261,12 @@ def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig: ...@@ -261,9 +261,12 @@ def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
'learning_rate': { 'learning_rate': {
'type': 'exponential', 'type': 'exponential',
'exponential': { 'exponential': {
'initial_learning_rate': 0.045, # 0.045 * NUM_GPUS
'decay_steps': int(2.4 * steps_per_epoch), 'initial_learning_rate': 0.045 * (train_batch_size // 96),
'decay_rate': 0.97, # (2.5 / NUM_GPUS) epochs
'decay_steps': int((2.5 / (train_batch_size // 96))
* steps_per_epoch),
'decay_rate': 0.98,
'staircase': True 'staircase': True
} }
}, },
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment