Merge branch 'tensorflow:master' into panoptic-deeplab-modeling

0225b135 · Srihari Humbarwadi · GitHub · 7479dbb8 · 4c571a3c · 0225b135
Unverified Commit 0225b135 authored Mar 05, 2022 by Srihari Humbarwadi Committed by GitHub Mar 05, 2022
20 changed files
--- a/official/vision/configs/experiments/retinanet/coco_mobiledetcpu_tpu.yaml
+++ b/official/vision/configs/experiments/retinanet/coco_mobiledetcpu_tpu.yaml
+# --experiment_type=retinanet_mobile_coco
+# COCO AP 27.0%
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 3.0e-05
+  model:
+    anchor:
+      anchor_size: 3
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      mobilenet:
+        model_id: 'MobileDetCPU'
+        filter_size_scale: 1.0
+      type: 'mobiledet'
+    decoder:
+      type: 'fpn'
+      fpn:
+        num_filters: 128
+        use_separable_conv: true
+    head:
+      num_convs: 4
+      num_filters: 128
+      use_separable_conv: true
+    input_size: [320 320, 3]
+    max_level: 6
+    min_level: 3
+    norm_activation:
+      activation: 'relu6'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.5
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [263340, 272580]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 277200
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/configs/experiments/retinanet/coco_mobilenetv2_tpu.yaml
+++ b/official/vision/configs/experiments/retinanet/coco_mobilenetv2_tpu.yaml
+# --experiment_type=retinanet_mobile_coco
+# COCO AP 23.5%
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 3.0e-05
+  model:
+    anchor:
+      anchor_size: 3
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      mobilenet:
+        model_id: 'MobileNetV2'
+        filter_size_scale: 1.0
+      type: 'mobilenet'
+    decoder:
+      type: 'fpn'
+      fpn:
+        num_filters: 128
+        use_separable_conv: true
+    head:
+      num_convs: 4
+      num_filters: 128
+      use_separable_conv: true
+    input_size: [256, 256, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: 'relu6'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.5
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [263340, 272580]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 277200
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/configs/experiments/retinanet/coco_spinenet143_tpu.yaml
+++ b/official/vision/configs/experiments/retinanet/coco_spinenet143_tpu.yaml
+# SpineNet-143 COCO detection with protocal C config. Expecting 50.0% AP.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 4.0e-05
+  model:
+    anchor:
+      anchor_size: 4
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      spinenet:
+        stochastic_depth_drop_rate: 0.2
+        model_id: '143'
+      type: 'spinenet'
+    decoder:
+      type: 'identity'
+    head:
+      num_convs: 4
+      num_filters: 256
+    input_size: [1280, 1280, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: 'swish'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  checkpoint_interval: 462
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [219450, 226380]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 231000
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/configs/experiments/retinanet/coco_spinenet190_tpu.yaml
+++ b/official/vision/configs/experiments/retinanet/coco_spinenet190_tpu.yaml
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 4.0e-05
+  model:
+    anchor:
+      anchor_size: 4
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      spinenet:
+        stochastic_depth_drop_rate: 0.2
+        model_id: '190'
+      type: 'spinenet'
+    decoder:
+      type: 'identity'
+    head:
+      num_convs: 7
+      num_filters: 512
+    input_size: [1280, 1280, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: 'swish'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  checkpoint_interval: 462
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [219450, 226380]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 231000
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml
+++ b/official/vision/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml
+# --experiment_type=retinanet_mobile_coco
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 3.0e-05
+  model:
+    anchor:
+      anchor_size: 3
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      spinenet_mobile:
+        stochastic_depth_drop_rate: 0.2
+        model_id: '49'
+        se_ratio: 0.2
+      type: 'spinenet_mobile'
+    decoder:
+      type: 'identity'
+    head:
+      num_convs: 4
+      num_filters: 48
+      use_separable_conv: true
+    input_size: [384, 384, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: 'swish'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.5
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  checkpoint_interval: 462
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [263340, 272580]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 277200
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/configs/experiments/retinanet/coco_spinenet49_tpu.yaml
+++ b/official/vision/configs/experiments/retinanet/coco_spinenet49_tpu.yaml
+# SpineNet-49 COCO detection with protocal C config. Expecting 44.2% AP.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 4.0e-05
+  model:
+    anchor:
+      anchor_size: 3
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      spinenet:
+        stochastic_depth_drop_rate: 0.2
+        model_id: '49'
+      type: 'spinenet'
+    decoder:
+      type: 'identity'
+    head:
+      num_convs: 4
+      num_filters: 256
+    input_size: [640, 640, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: 'swish'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  checkpoint_interval: 462
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [219450, 226380]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 231000
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml
+++ b/official/vision/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml
+# --experiment_type=retinanet_mobile_coco
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 3.0e-05
+  model:
+    anchor:
+      anchor_size: 3
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      spinenet_mobile:
+        stochastic_depth_drop_rate: 0.2
+        model_id: '49S'
+        se_ratio: 0.2
+      type: 'spinenet_mobile'
+    decoder:
+      type: 'identity'
+    head:
+      num_convs: 4
+      num_filters: 40
+      use_separable_conv: true
+    input_size: [384, 384, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: 'swish'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.5
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  checkpoint_interval: 462
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [263340, 272580]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 277200
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml
+++ b/official/vision/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml
+# --experiment_type=retinanet_mobile_coco
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 3.0e-05
+  model:
+    anchor:
+      anchor_size: 3
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      spinenet_mobile:
+        stochastic_depth_drop_rate: 0.2
+        model_id: '49XS'
+        se_ratio: 0.2
+      type: 'spinenet_mobile'
+    decoder:
+      type: 'identity'
+    head:
+      num_convs: 4
+      num_filters: 24
+      use_separable_conv: true
+    input_size: [256, 256, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: 'swish'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.5
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  checkpoint_interval: 462
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [263340, 272580]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 277200
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
+++ b/official/vision/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
+# SpineNet-96 COCO detection with protocol C config. Expecting 48.5% AP.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  losses:
+    l2_weight_decay: 4.0e-05
+  model:
+    anchor:
+      anchor_size: 3
+      aspect_ratios: [0.5, 1.0, 2.0]
+      num_scales: 3
+    backbone:
+      spinenet:
+        stochastic_depth_drop_rate: 0.2
+        model_id: '96'
+      type: 'spinenet'
+    decoder:
+      type: 'identity'
+    head:
+      num_convs: 4
+      num_filters: 256
+    input_size: [1024, 1024, 3]
+    max_level: 7
+    min_level: 3
+    norm_activation:
+      activation: 'swish'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    dtype: 'bfloat16'
+    global_batch_size: 256
+    is_training: true
+    parser:
+      aug_rand_hflip: true
+      aug_scale_max: 2.0
+      aug_scale_min: 0.1
+  validation_data:
+    dtype: 'bfloat16'
+    global_batch_size: 8
+    is_training: false
+trainer:
+  checkpoint_interval: 462
+  optimizer_config:
+    learning_rate:
+      stepwise:
+        boundaries: [219450, 226380]
+        values: [0.32, 0.032, 0.0032]
+      type: 'stepwise'
+    warmup:
+      linear:
+        warmup_learning_rate: 0.0067
+        warmup_steps: 2000
+  steps_per_loop: 462
+  train_steps: 231000
+  validation_interval: 462
+  validation_steps: 625
--- a/official/vision/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
+++ b/official/vision/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  annotation_file: ''  # Can't use annotation file when tfds is used.
+  losses:
+    l2_weight_decay: 0.0001
+  model:
+    num_classes: 91
+    max_level: 7
+    min_level: 3
+    input_size: [640, 640, 3]
+    norm_activation:
+      activation: relu
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  train_data:
+    tfds_name: 'coco/2017'
+    tfds_split: 'train'
+    drop_remainder: true
+    dtype: bfloat16
+    global_batch_size: 256
+    input_path: ''
+    is_training: true
+    shuffle_buffer_size: 1000
+  validation_data:
+    tfds_name: 'coco/2017'
+    tfds_split: 'validation'
+    drop_remainder: true
+    dtype: bfloat16
+    global_batch_size: 8
+    input_path: ''
+    is_training: false
--- a/official/vision/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml
+++ b/official/vision/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml
+# Benchmarks runs on same instnace, change eval batch size to fit on 4x4 tpu
+task:
+  validation_data:
+    global_batch_size: 32
+trainer:
+  validation_interval: 1560
+  validation_steps: 156
--- a/official/vision/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
+++ b/official/vision/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
+# Use your own cityscapes preprocessed dataset. 79% meanIoU.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'float32'
+task:
+  model:
+    num_classes: 19
+    input_size: [null, null, 3]
+    backbone:
+      type: 'dilated_resnet'
+      dilated_resnet:
+        model_id: 101
+        output_stride: 16
+        stem_type: 'v1'
+        se_ratio: 0.25
+        stochastic_depth_drop_rate: 0.2
+        multigrid: [1, 2, 4]
+        last_stage_repeats: 1
+    decoder:
+      aspp:
+        pool_kernel_size: [512, 1024]
+    head:
+      feature_fusion: 'deeplabv3plus'
+      low_level: 2
+      low_level_num_filters: 48
+    norm_activation:
+      activation: 'swish'
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  losses:
+    top_k_percent_pixels: 1.0  # only backpropagate loss for the topk 100% pixels.
+  train_data:
+    output_size: [1024, 2048]
+    crop_size: [512, 1024]
+    input_path: ''
+    tfds_name: 'cityscapes/semantic_segmentation'
+    tfds_split: 'train'
+    is_training: true
+    global_batch_size: 16
+    dtype: 'float32'
+    aug_rand_hflip: true
+    aug_scale_max: 2.0
+    aug_scale_min: 0.5
+  validation_data:
+    output_size: [1024, 2048]
+    input_path: ''
+    tfds_name: 'cityscapes/semantic_segmentation'
+    tfds_split: 'validation'
+    is_training: false
+    global_batch_size: 16
+    dtype: 'float32'
+    drop_remainder: false
+    resize_eval_groundtruth: true
+trainer:
+  optimizer_config:
+    learning_rate:
+      polynomial:
+        decay_steps: 90000
+        initial_learning_rate: 0.01
+        power: 0.9
+      type: polynomial
+    optimizer:
+      sgd:
+        momentum: 0.9
+      type: sgd
+    warmup:
+      linear:
+        name: linear
+        warmup_learning_rate: 0
+        warmup_steps: 925
+      type: linear
+  steps_per_loop: 185
+  summary_interval: 185
+  train_steps: 90000
+  validation_interval: 185
+  validation_steps: 31
+  checkpoint_interval: 185
--- a/official/vision/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml
+++ b/official/vision/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml
+# 3D ResNet-50 video classification on Kinetics-400.
+#
+# --experiment_type=video_classification_kinetics400
+# Expected accuracy: 77.0% top-1, 93.0% top-5.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    dropout_rate: 0.5
+    norm_activation:
+      use_sync_bn: false
+    backbone:
+      resnet_3d:
+        block_specs: !!python/tuple
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        model_id: 50
+        stem_conv_temporal_kernel_size: 5
+        stem_conv_temporal_stride: 2
+        stem_pool_temporal_stride: 1
+  train_data:
+    name: kinetics400
+    feature_shape: !!python/tuple
+    - 32
+    - 224
+    - 224
+    - 3
+    temporal_stride: 2
+    global_batch_size: 1024
+    dtype: 'bfloat16'
+    shuffle_buffer_size: 1024
+    aug_max_area_ratio: 1.0
+    aug_max_aspect_ratio: 2.0
+    aug_min_area_ratio: 0.08
+    aug_min_aspect_ratio: 0.5
+  validation_data:
+    name: kinetics400
+    feature_shape: !!python/tuple
+    - 32
+    - 256
+    - 256
+    - 3
+    temporal_stride: 2
+    num_test_clips: 10
+    num_test_crops: 3
+    global_batch_size: 64
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  optimizer_config:
+    learning_rate:
+      cosine:
+        initial_learning_rate: 0.8
+        decay_steps: 42104
+    warmup:
+      linear:
+        warmup_steps: 1053
+  train_steps: 42104
+  steps_per_loop: 500
+  summary_interval: 500
+  validation_interval: 500
--- a/official/vision/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
+++ b/official/vision/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
+# 3D ResNet-RS-50 video classification on Kinetics-400.
+#
+# --experiment_type=video_classification_kinetics400
+# Expected accuracy: 78.2% top-1 accuracy.
+runtime:
+  mixed_precision_dtype: bfloat16
+task:
+  losses:
+    l2_weight_decay: 0.00004
+    label_smoothing: 0.1
+    one_hot: true
+  model:
+    aggregate_endpoints: false
+    backbone:
+      resnet_3d_rs:
+        model_id: 50
+        stem_type: 'v1'
+        stem_conv_temporal_kernel_size: 5
+        stem_conv_temporal_stride: 2
+        stem_pool_temporal_stride: 1
+        stochastic_depth_drop_rate: 0.1
+        se_ratio: 0.25
+      type: resnet_3d_rs
+    dropout_rate: 0.5
+    model_type: video_classification
+    norm_activation:
+      activation: relu
+      norm_epsilon: 1.0e-05
+      norm_momentum: 0.0
+      use_sync_bn: false
+  train_data:
+    data_format: channels_last
+    drop_remainder: true
+    dtype: bfloat16
+    feature_shape: !!python/tuple
+    - 32
+    - 224
+    - 224
+    - 3
+    file_type: sstable
+    global_batch_size: 1024
+    is_training: true
+    min_image_size: 256
+    name: kinetics400
+    num_channels: 3
+    num_classes: 400
+    num_examples: 215570
+    num_test_clips: 1
+    num_test_crops: 1
+    one_hot: true
+    temporal_stride: 2
+    aug_max_area_ratio: 1.0
+    aug_max_aspect_ratio: 2.0
+    aug_min_area_ratio: 0.08
+    aug_min_aspect_ratio: 0.5
+  validation_data:
+    data_format: channels_last
+    drop_remainder: false
+    dtype: bfloat16
+    feature_shape: !!python/tuple
+    - 32
+    - 256
+    - 256
+    - 3
+    file_type: sstable
+    global_batch_size: 64
+    is_training: false
+    min_image_size: 256
+    name: kinetics400
+    num_channels: 3
+    num_classes: 400
+    num_examples: 17706
+    num_test_clips: 10
+    num_test_crops: 3
+    one_hot: true
+    temporal_stride: 2
+trainer:
+  checkpoint_interval: 210
+  max_to_keep: 3
+  optimizer_config:
+    ema:
+      average_decay: 0.9999
+      trainable_weights_only: false
+    learning_rate:
+      cosine:
+        decay_steps: 73682
+        initial_learning_rate: 0.8
+        name: CosineDecay
+      type: cosine
+    warmup:
+      linear:
+        name: linear
+        warmup_learning_rate: 0
+        warmup_steps: 1050
+      type: linear
+  train_steps: 73682
+  steps_per_loop: 500
+  summary_interval: 500
+  validation_interval: 500
--- a/official/vision/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml
+++ b/official/vision/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml
+# SlowOnly 16x4 video classification on Kinetics-400.
+#
+# --experiment_type=video_classification_kinetics400
+# Expected accuracy: 75.6% top-1, 92.1% top-5.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    dropout_rate: 0.5
+    norm_activation:
+      use_sync_bn: false
+    backbone:
+      resnet_3d:
+        block_specs: !!python/tuple
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        model_id: 50
+        stem_conv_temporal_kernel_size: 1
+        stem_conv_temporal_stride: 1
+        stem_pool_temporal_stride: 1
+  train_data:
+    name: kinetics400
+    feature_shape: !!python/tuple
+    - 16
+    - 224
+    - 224
+    - 3
+    temporal_stride: 4
+    global_batch_size: 1024
+    dtype: 'bfloat16'
+    shuffle_buffer_size: 1024
+    aug_max_area_ratio: 1.0
+    aug_max_aspect_ratio: 2.0
+    aug_min_area_ratio: 0.08
+    aug_min_aspect_ratio: 0.5
+  validation_data:
+    name: kinetics400
+    feature_shape: !!python/tuple
+    - 16
+    - 256
+    - 256
+    - 3
+    temporal_stride: 4
+    num_test_clips: 10
+    num_test_crops: 3
+    global_batch_size: 64
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  optimizer_config:
+    learning_rate:
+      cosine:
+        initial_learning_rate: 0.8
+        decay_steps: 42104
+    warmup:
+      linear:
+        warmup_steps: 1053
+  train_steps: 42104
+  steps_per_loop: 500
+  summary_interval: 500
+  validation_interval: 500
--- a/official/vision/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
+++ b/official/vision/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
+# SlowOnly 8x8 video classification on Kinetics-400.
+#
+# --experiment_type=video_classification_kinetics400
+# Expected accuracy: 74.1% top-1, 91.4% top-5.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    dropout_rate: 0.5
+    norm_activation:
+      use_sync_bn: false
+    backbone:
+      resnet_3d:
+        block_specs: !!python/tuple
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        model_id: 50
+        stem_conv_temporal_kernel_size: 1
+        stem_conv_temporal_stride: 1
+        stem_pool_temporal_stride: 1
+  train_data:
+    name: kinetics400
+    feature_shape: !!python/tuple
+    - 8
+    - 224
+    - 224
+    - 3
+    temporal_stride: 8
+    global_batch_size: 1024
+    dtype: 'bfloat16'
+    shuffle_buffer_size: 1024
+    aug_max_area_ratio: 1.0
+    aug_max_aspect_ratio: 2.0
+    aug_min_area_ratio: 0.08
+    aug_min_aspect_ratio: 0.5
+  validation_data:
+    name: kinetics400
+    feature_shape: !!python/tuple
+    - 8
+    - 256
+    - 256
+    - 3
+    temporal_stride: 8
+    num_test_clips: 10
+    num_test_crops: 3
+    global_batch_size: 64
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  optimizer_config:
+    learning_rate:
+      cosine:
+        initial_learning_rate: 0.8
+        decay_steps: 42104
+    warmup:
+      linear:
+        warmup_steps: 1053
+  train_steps: 42104
+  steps_per_loop: 500
+  summary_interval: 500
+  validation_interval: 500
--- a/official/vision/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml
+++ b/official/vision/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml
+# 3D ResNet-50 video classification on Kinetics-600.
+#
+# --experiment_type=video_classification_kinetics600
+# Expected accuracy: 79.5% top-1, 94.8% top-5.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    dropout_rate: 0.5
+    norm_activation:
+      use_sync_bn: false
+    backbone:
+      resnet_3d:
+        block_specs: !!python/tuple
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        model_id: 50
+        stem_conv_temporal_kernel_size: 5
+        stem_conv_temporal_stride: 2
+        stem_pool_temporal_stride: 1
+  train_data:
+    name: kinetics600
+    feature_shape: !!python/tuple
+    - 32
+    - 224
+    - 224
+    - 3
+    temporal_stride: 2
+    global_batch_size: 1024
+    dtype: 'bfloat16'
+    shuffle_buffer_size: 1024
+    aug_max_area_ratio: 1.0
+    aug_max_aspect_ratio: 2.0
+    aug_min_area_ratio: 0.08
+    aug_min_aspect_ratio: 0.5
+  validation_data:
+    name: kinetics600
+    feature_shape: !!python/tuple
+    - 32
+    - 256
+    - 256
+    - 3
+    temporal_stride: 2
+    num_test_clips: 10
+    num_test_crops: 3
+    global_batch_size: 64
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  optimizer_config:
+    learning_rate:
+      cosine:
+        initial_learning_rate: 0.8
+        decay_steps: 71488
+    warmup:
+      linear:
+        warmup_steps: 1787
+  train_steps: 71488
+  steps_per_loop: 500
+  summary_interval: 500
+  validation_interval: 500
--- a/official/vision/configs/experiments/video_classification/k600_3d-resnet50g_tpu.yaml
+++ b/official/vision/configs/experiments/video_classification/k600_3d-resnet50g_tpu.yaml
+# 3D ResNet-50g video classification on Kinetics-600.
+#
+# --experiment_type=video_classification_kinetics600
+# Expected accuracy: 78.7% accuracy, 93.6% top-5.
+# Train on TPU: v3-128, eval on TPU: v3-32
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  init_checkpoint: null
+  init_checkpoint_modules: all
+  losses:
+    l2_weight_decay: 0.0001
+    label_smoothing: 0.0
+  model:
+    aggregate_endpoints: false
+    backbone:
+      resnet_3d:
+        block_specs: !!python/tuple
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: true
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 1
+          - 3
+          - 1
+          temporal_strides: 1
+          use_self_gating: true
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 1
+          - 3
+          - 1
+          - 3
+          - 1
+          temporal_strides: 1
+          use_self_gating: true
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 3
+          - 1
+          temporal_strides: 1
+          use_self_gating: true
+        model_id: 50
+        stem_conv_temporal_kernel_size: 5
+        stem_conv_temporal_stride: 2
+        stem_pool_temporal_stride: 2
+        stem_type: v0
+        stochastic_depth_drop_rate: 0.0
+      type: resnet_3d
+    dropout_rate: 0.2
+    model_type: video_classification
+    norm_activation:
+      activation: relu
+      norm_epsilon: 1.0e-05
+      norm_momentum: 0.9
+      use_sync_bn: false
+  train_data:
+    aug_max_area_ratio: 1.0
+    aug_max_aspect_ratio: 2.0
+    aug_min_area_ratio: 0.49
+    aug_min_aspect_ratio: 0.5
+    drop_remainder: true
+    dtype: 'bfloat16'
+    feature_shape: !!python/tuple
+    - 64
+    - 224
+    - 224
+    - 3
+    global_batch_size: 1024
+    min_image_size: 256
+    name: kinetics600
+    num_classes: 600
+    split: train
+  validation_data:
+    dtype: 'bfloat16'
+    feature_shape: !!python/tuple
+    - 250
+    - 224
+    - 224
+    - 3
+    global_batch_size: 64
+    min_image_size: 256
+    name: kinetics600
+    num_classes: 600
+    num_examples: 27780
+    num_test_clips: 1
+    num_test_crops: 1
+    one_hot: true
+trainer:
+  optimizer_config:
+    learning_rate:
+      cosine:
+        alpha: 0.0
+        decay_steps: 71400
+        initial_learning_rate: 1.6
+        name: CosineDecay
+      type: cosine
+    warmup:
+      linear:
+        name: linear
+        warmup_learning_rate: 0
+        warmup_steps: 1785
+      type: linear
+  train_steps: 71400
+  steps_per_loop: 500
+  summary_interval: 500
+  validation_interval: 500
--- a/official/vision/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml
+++ b/official/vision/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml
+# SlowOnly 8x8 video classification on Kinetics-600.
+#
+# --experiment_type=video_classification_kinetics600
+# Expected accuracy: 77.3% top-1, 93.6% top-5.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    dropout_rate: 0.5
+    norm_activation:
+      use_sync_bn: false
+    backbone:
+      resnet_3d:
+        block_specs: !!python/tuple
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        model_id: 50
+        stem_conv_temporal_kernel_size: 1
+        stem_conv_temporal_stride: 1
+        stem_pool_temporal_stride: 1
+  train_data:
+    name: kinetics600
+    feature_shape: !!python/tuple
+    - 8
+    - 224
+    - 224
+    - 3
+    temporal_stride: 8
+    global_batch_size: 1024
+    dtype: 'bfloat16'
+    shuffle_buffer_size: 1024
+    aug_max_area_ratio: 1.0
+    aug_max_aspect_ratio: 2.0
+    aug_min_area_ratio: 0.08
+    aug_min_aspect_ratio: 0.5
+  validation_data:
+    name: kinetics600
+    feature_shape: !!python/tuple
+    - 8
+    - 256
+    - 256
+    - 3
+    temporal_stride: 8
+    num_test_clips: 10
+    num_test_crops: 3
+    global_batch_size: 64
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  optimizer_config:
+    learning_rate:
+      cosine:
+        initial_learning_rate: 0.8
+        decay_steps: 71488
+    warmup:
+      linear:
+        warmup_steps: 1787
+  train_steps: 71488
+  steps_per_loop: 500
+  summary_interval: 500
+  validation_interval: 500
--- a/official/vision/configs/image_classification.py
+++ b/official/vision/configs/image_classification.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Image classification configuration definition."""
+import dataclasses
+import os
+from typing import List, Optional
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.modeling import optimization
+from official.vision.configs import common
+from official.vision.configs import backbones
+
+
+@dataclasses.dataclass
+class DataConfig(cfg.DataConfig):
+  """Input config for training."""
+  input_path: str = ''
+  global_batch_size: int = 0
+  is_training: bool = True
+  dtype: str = 'float32'
+  shuffle_buffer_size: int = 10000
+  cycle_length: int = 10
+  is_multilabel: bool = False
+  aug_rand_hflip: bool = True
+  aug_type: Optional[
+      common.Augmentation] = None  # Choose from AutoAugment and RandAugment.
+  color_jitter: float = 0.
+  random_erasing: Optional[common.RandomErasing] = None
+  file_type: str = 'tfrecord'
+  image_field_key: str = 'image/encoded'
+  label_field_key: str = 'image/class/label'
+  decode_jpeg_only: bool = True
+  mixup_and_cutmix: Optional[common.MixupAndCutmix] = None
+  decoder: Optional[common.DataDecoder] = common.DataDecoder()
+
+  # Keep for backward compatibility.
+  aug_policy: Optional[str] = None  # None, 'autoaug', or 'randaug'.
+  randaug_magnitude: Optional[int] = 10
+
+
+@dataclasses.dataclass
+class ImageClassificationModel(hyperparams.Config):
+  """The model config."""
+  num_classes: int = 0
+  input_size: List[int] = dataclasses.field(default_factory=list)
+  backbone: backbones.Backbone = backbones.Backbone(
+      type='resnet', resnet=backbones.ResNet())
+  dropout_rate: float = 0.0
+  norm_activation: common.NormActivation = common.NormActivation(
+      use_sync_bn=False)
+  # Adds a BatchNormalization layer pre-GlobalAveragePooling in classification
+  add_head_batch_norm: bool = False
+  kernel_initializer: str = 'random_uniform'
+
+
+@dataclasses.dataclass
+class Losses(hyperparams.Config):
+  loss_weight: float = 1.0
+  one_hot: bool = True
+  label_smoothing: float = 0.0
+  l2_weight_decay: float = 0.0
+  soft_labels: bool = False
+
+
+@dataclasses.dataclass
+class Evaluation(hyperparams.Config):
+  top_k: int = 5
+
+
+@dataclasses.dataclass
+class ImageClassificationTask(cfg.TaskConfig):
+  """The task config."""
+  model: ImageClassificationModel = ImageClassificationModel()
+  train_data: DataConfig = DataConfig(is_training=True)
+  validation_data: DataConfig = DataConfig(is_training=False)
+  losses: Losses = Losses()
+  evaluation: Evaluation = Evaluation()
+  init_checkpoint: Optional[str] = None
+  init_checkpoint_modules: str = 'all'  # all or backbone
+  model_output_keys: Optional[List[int]] = dataclasses.field(
+      default_factory=list)
+
+
+@exp_factory.register_config_factory('image_classification')
+def image_classification() -> cfg.ExperimentConfig:
+  """Image classification general."""
+  return cfg.ExperimentConfig(
+      task=ImageClassificationTask(),
+      trainer=cfg.TrainerConfig(),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+
+
+IMAGENET_TRAIN_EXAMPLES = 1281167
+IMAGENET_VAL_EXAMPLES = 50000
+IMAGENET_INPUT_PATH_BASE = 'imagenet-2012-tfrecord'
+
+
+@exp_factory.register_config_factory('resnet_imagenet')
+def image_classification_imagenet() -> cfg.ExperimentConfig:
+  """Image classification on imagenet with resnet."""
+  train_batch_size = 4096
+  eval_batch_size = 4096
+  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
+  config = cfg.ExperimentConfig(
+      runtime=cfg.RuntimeConfig(enable_xla=True),
+      task=ImageClassificationTask(
+          model=ImageClassificationModel(
+              num_classes=1001,
+              input_size=[224, 224, 3],
+              backbone=backbones.Backbone(
+                  type='resnet', resnet=backbones.ResNet(model_id=50)),
+              norm_activation=common.NormActivation(
+                  norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)),
+          losses=Losses(l2_weight_decay=1e-4),
+          train_data=DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
+              is_training=True,
+              global_batch_size=train_batch_size),
+          validation_data=DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
+              is_training=False,
+              global_batch_size=eval_batch_size)),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          train_steps=90 * steps_per_epoch,
+          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'sgd',
+                  'sgd': {
+                      'momentum': 0.9
+                  }
+              },
+              'learning_rate': {
+                  'type': 'stepwise',
+                  'stepwise': {
+                      'boundaries': [
+                          30 * steps_per_epoch, 60 * steps_per_epoch,
+                          80 * steps_per_epoch
+                      ],
+                      'values': [
+                          0.1 * train_batch_size / 256,
+                          0.01 * train_batch_size / 256,
+                          0.001 * train_batch_size / 256,
+                          0.0001 * train_batch_size / 256,
+                      ]
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 5 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+
+  return config
+
+
+@exp_factory.register_config_factory('resnet_rs_imagenet')
+def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig:
+  """Image classification on imagenet with resnet-rs."""
+  train_batch_size = 4096
+  eval_batch_size = 4096
+  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
+  config = cfg.ExperimentConfig(
+      task=ImageClassificationTask(
+          model=ImageClassificationModel(
+              num_classes=1001,
+              input_size=[160, 160, 3],
+              backbone=backbones.Backbone(
+                  type='resnet',
+                  resnet=backbones.ResNet(
+                      model_id=50,
+                      stem_type='v1',
+                      resnetd_shortcut=True,
+                      replace_stem_max_pool=True,
+                      se_ratio=0.25,
+                      stochastic_depth_drop_rate=0.0)),
+              dropout_rate=0.25,
+              norm_activation=common.NormActivation(
+                  norm_momentum=0.0,
+                  norm_epsilon=1e-5,
+                  use_sync_bn=False,
+                  activation='swish')),
+          losses=Losses(l2_weight_decay=4e-5, label_smoothing=0.1),
+          train_data=DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
+              is_training=True,
+              global_batch_size=train_batch_size,
+              aug_type=common.Augmentation(
+                  type='randaug', randaug=common.RandAugment(magnitude=10))),
+          validation_data=DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
+              is_training=False,
+              global_batch_size=eval_batch_size)),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          train_steps=350 * steps_per_epoch,
+          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'sgd',
+                  'sgd': {
+                      'momentum': 0.9
+                  }
+              },
+              'ema': {
+                  'average_decay': 0.9999,
+                  'trainable_weights_only': False,
+              },
+              'learning_rate': {
+                  'type': 'cosine',
+                  'cosine': {
+                      'initial_learning_rate': 1.6,
+                      'decay_steps': 350 * steps_per_epoch
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 5 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+  return config
+
+
+@exp_factory.register_config_factory('revnet_imagenet')
+def image_classification_imagenet_revnet() -> cfg.ExperimentConfig:
+  """Returns a revnet config for image classification on imagenet."""
+  train_batch_size = 4096
+  eval_batch_size = 4096
+  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
+
+  config = cfg.ExperimentConfig(
+      task=ImageClassificationTask(
+          model=ImageClassificationModel(
+              num_classes=1001,
+              input_size=[224, 224, 3],
+              backbone=backbones.Backbone(
+                  type='revnet', revnet=backbones.RevNet(model_id=56)),
+              norm_activation=common.NormActivation(
+                  norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False),
+              add_head_batch_norm=True),
+          losses=Losses(l2_weight_decay=1e-4),
+          train_data=DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
+              is_training=True,
+              global_batch_size=train_batch_size),
+          validation_data=DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
+              is_training=False,
+              global_batch_size=eval_batch_size)),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          train_steps=90 * steps_per_epoch,
+          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'sgd',
+                  'sgd': {
+                      'momentum': 0.9
+                  }
+              },
+              'learning_rate': {
+                  'type': 'stepwise',
+                  'stepwise': {
+                      'boundaries': [
+                          30 * steps_per_epoch, 60 * steps_per_epoch,
+                          80 * steps_per_epoch
+                      ],
+                      'values': [0.8, 0.08, 0.008, 0.0008]
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 5 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+
+  return config
+
+
+@exp_factory.register_config_factory('mobilenet_imagenet')
+def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
+  """Image classification on imagenet with mobilenet."""
+  train_batch_size = 4096
+  eval_batch_size = 4096
+  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
+  config = cfg.ExperimentConfig(
+      task=ImageClassificationTask(
+          model=ImageClassificationModel(
+              num_classes=1001,
+              dropout_rate=0.2,
+              input_size=[224, 224, 3],
+              backbone=backbones.Backbone(
+                  type='mobilenet',
+                  mobilenet=backbones.MobileNet(
+                      model_id='MobileNetV2', filter_size_scale=1.0)),
+              norm_activation=common.NormActivation(
+                  norm_momentum=0.997, norm_epsilon=1e-3, use_sync_bn=False)),
+          losses=Losses(l2_weight_decay=1e-5, label_smoothing=0.1),
+          train_data=DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
+              is_training=True,
+              global_batch_size=train_batch_size),
+          validation_data=DataConfig(
+              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
+              is_training=False,
+              global_batch_size=eval_batch_size)),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          train_steps=500 * steps_per_epoch,
+          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'rmsprop',
+                  'rmsprop': {
+                      'rho': 0.9,
+                      'momentum': 0.9,
+                      'epsilon': 0.002,
+                  }
+              },
+              'learning_rate': {
+                  'type': 'exponential',
+                  'exponential': {
+                      'initial_learning_rate':
+                          0.008 * (train_batch_size // 128),
+                      'decay_steps':
+                          int(2.5 * steps_per_epoch),
+                      'decay_rate':
+                          0.98,
+                      'staircase':
+                          True
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 5 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              },
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+
+  return config