Internal change

PiperOrigin-RevId: 377425143

Internal change
PiperOrigin-RevId: 377425143
1fed7f94 · A. Unique TensorFlower · 1ba20e04 · 1fed7f94 · 1fed7f94 · 1fed7f94
Commit 1fed7f94 authored Jun 03, 2021 by A. Unique TensorFlower
5 changed files
--- a/official/vision/beta/projects/simclr/configs/experiments/cifar_simclr_pretrain.yaml
+++ b/official/vision/beta/projects/simclr/configs/experiments/cifar_simclr_pretrain.yaml
@@ -72,7 +72,7 @@ trainer:
      type: 'cosine'
      cosine:
        initial_learning_rate: 0.6  #  0.3 × BatchSize / 256
-        decay_steps: 43200  # train_steps - warmup_steps
+        decay_steps: 48000
    warmup:
      type: 'linear'
      linear:

--- a/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_finetune_gpu.yaml
+++ b/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_finetune_gpu.yaml
-# ImageNet classification.
+# SimCLR Imagenet 10% finetuning.
 runtime:
  distribution_strategy: 'mirrored'
  mixed_precision_dtype: 'float16'
@@ -55,7 +55,7 @@ trainer:
  train_steps: 12500  # 100 epochs
  validation_steps: 49  # NUM_EXAMPLES (50000) // global_batch_size
  validation_interval: 125
-  steps_per_loop: 125  # NUM_EXAMPLES (1281167) // global_batch_size
+  steps_per_loop: 125  # NUM_EXAMPLES (128116) // global_batch_size
  summary_interval: 125
  checkpoint_interval: 125
  optimizer_config:

--- a/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_finetune_tpu.yaml
+++ b/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_finetune_tpu.yaml
+# SimCLR Imagenet 10% finetuning.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    mode: 'finetune'
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+    backbone_trainable: true
+    projection_head:
+      proj_output_dim: 128
+      num_proj_layers: 3
+      ft_proj_idx: 1
+    supervised_head:
+      num_classes: 1001
+      zero_init: true
+    norm_activation:
+      use_sync_bn: false
+      norm_momentum: 0.9
+      norm_epsilon: 0.00001
+  loss:
+    label_smoothing: 0.0
+    one_hot: true
+  evaluation:
+    top_k: 5
+    one_hot: true
+  init_checkpoint: gs://tf_model_garden/vision/simclr/r50_1x
+  init_checkpoint_modules: 'backbone_projection'
+  train_data:
+    tfds_name: 'imagenet2012_subset/10pct'
+    tfds_split: 'train'
+    input_path: ''
+    is_training: true
+    global_batch_size: 1024
+    dtype: 'bfloat16'
+    parser:
+      mode: 'finetune'
+  validation_data:
+    tfds_name: 'imagenet2012_subset/10pct'
+    tfds_split: 'validation'
+    input_path: ''
+    is_training: false
+    global_batch_size: 1024
+    dtype: 'bfloat16'
+    drop_remainder: false
+    parser:
+      mode: 'finetune'
+trainer:
+  train_steps: 12500  # 100 epochs
+  validation_steps: 49  # NUM_EXAMPLES (50000) // global_batch_size
+  validation_interval: 125
+  steps_per_loop: 125  # NUM_EXAMPLES (128116) // global_batch_size
+  summary_interval: 125
+  checkpoint_interval: 125
+  optimizer_config:
+    optimizer:
+      type: 'lars'
+      lars:
+        momentum: 0.9
+        weight_decay_rate: 0.0
+        exclude_from_weight_decay: ['batch_normalization', 'bias']
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 0.04  #  0.01 × BatchSize / 512
+        decay_steps: 12500  # train_steps
--- a/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_pretrain_gpu.yaml
+++ b/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_pretrain_gpu.yaml
-# ImageNet classification.
+# SimCLR Imagenet pretraining.
 runtime:
  distribution_strategy: 'mirrored'
  mixed_precision_dtype: 'float16'
@@ -49,12 +49,12 @@ task:
    decoder:
      decode_label: true
 trainer:
-  train_steps: 187200  # 300 epochs
+  train_steps: 500000  # 800 epochs
  validation_steps: 24  # NUM_EXAMPLES (50000) // global_batch_size
-  validation_interval: 624
-  steps_per_loop: 624  # NUM_EXAMPLES (1281167) // global_batch_size
-  summary_interval: 624
-  checkpoint_interval: 624
+  validation_interval: 625
+  steps_per_loop: 625  # NUM_EXAMPLES (1281167) // global_batch_size
+  summary_interval: 625
+  checkpoint_interval: 625
  optimizer_config:
    optimizer:
      type: 'lars'
@@ -66,8 +66,8 @@ trainer:
      type: 'cosine'
      cosine:
        initial_learning_rate: 1.6  #  0.2 * BatchSize / 256
-        decay_steps: 177840  # train_steps - warmup_steps
+        decay_steps: 500000
    warmup:
      type: 'linear'
      linear:
-        warmup_steps: 9360  # 5% of total epochs
+        warmup_steps: 25000  # 5% of total epochs
--- a/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_pretrain_tpu.yaml
+++ b/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_pretrain_tpu.yaml
+# SimCLR Imagenet pretraining.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    mode: 'pretrain'
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+    backbone_trainable: true
+    projection_head:
+      proj_output_dim: 128
+      num_proj_layers: 3
+      ft_proj_idx: 0
+    supervised_head:
+      num_classes: 1001
+    norm_activation:
+      use_sync_bn: true
+      norm_momentum: 0.9
+      norm_epsilon: 0.00001
+  loss:
+    projection_norm: true
+    temperature: 0.1
+  evaluation:
+    top_k: 5
+    one_hot: true
+  train_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 2048
+    dtype: 'bfloat16'
+    parser:
+      mode: 'pretrain'
+    decoder:
+      decode_label: true
+  validation_data:
+    input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 2048
+    dtype: 'bfloat16'
+    drop_remainder: false
+    parser:
+      mode: 'pretrain'
+    decoder:
+      decode_label: true
+trainer:
+  train_steps: 500000  # 800 epochs
+  validation_steps: 24  # NUM_EXAMPLES (50000) // global_batch_size
+  validation_interval: 625
+  steps_per_loop: 625  # NUM_EXAMPLES (1281167) // global_batch_size
+  summary_interval: 625
+  checkpoint_interval: 625
+  optimizer_config:
+    optimizer:
+      type: 'lars'
+      lars:
+        momentum: 0.9
+        weight_decay_rate: 0.000001
+        exclude_from_weight_decay: ['batch_normalization', 'bias']
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6  #  0.2 * BatchSize / 256
+        decay_steps: 500000
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 25000  # 5% of total epochs