modify default hyperparameters

716b9d73 · Shixin Luo · 61961346 · 716b9d73 · 716b9d73
Commit 716b9d73 authored Sep 29, 2020 by Shixin Luo
2 changed files
--- a/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_1.0_gpu.yaml
+++ b/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_1.0_gpu.yaml
@@ -13,7 +13,7 @@ task:
        width_multiplier: 1.0
    dropout_rate: 0.2
  losses:
-    l2_weight_decay: 0.0001
+    l2_weight_decay: 0.00002
    one_hot: True
    label_smoothing: 0.1
  train_data:
@@ -28,7 +28,7 @@ task:
    dtype: 'float32'
    drop_remainder: False
 trainer:
-  train_steps: 150120  # 90 * steps_per_epoch
+  train_steps: 700000  # 700K
  validation_steps: 65
  validation_interval: 1668
  steps_per_loop: 1668  # NUM_EXAMPLES (1281167) // global_batch_size
@@ -46,7 +46,7 @@ trainer:
      exponential:
        initial_learning_rate: 0.36  # 0.045 * NUM_GPUS
        decay_steps: 4170  # 2.5 * steps_per_epoch
-        decay_rate: 0.97
+        decay_rate: 0.98
        staircase: True
    warmup:
      type: 'linear'

--- a/official/vision/beta/configs/image_classification.py
+++ b/official/vision/beta/configs/image_classification.py
@@ -233,7 +233,7 @@ def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
                      model_id='MobileNetV2', width_multiplier=1.0)),
              norm_activation=common.NormActivation(
                  norm_momentum=0.9997, norm_epsilon=1e-3)),
-          losses=Losses(l2_weight_decay=1e-4, label_smoothing=0.1),
+          losses=Losses(l2_weight_decay=2e-5, label_smoothing=0.1),
          train_data=DataConfig(
              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
              is_training=True,
@@ -264,8 +264,7 @@ def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
                      # 0.045 * NUM_GPUS
                      'initial_learning_rate': 0.045 * (train_batch_size // 96),
                      # (2.5 / NUM_GPUS) epochs
-                      'decay_steps': int((2.5 / (train_batch_size // 96))
+                      'decay_steps': int(2.5 * steps_per_epoch),
-                                         * steps_per_epoch),
                      'decay_rate': 0.98,
                      'staircase': True
                  }