Internal change

PiperOrigin-RevId: 431756117

Internal change
PiperOrigin-RevId: 431756117
c44482ab · A. Unique TensorFlower · 10ee28dd · c44482ab · c44482ab · c44482ab
Commit c44482ab authored Mar 01, 2022 by A. Unique TensorFlower
20 changed files
--- a/official/vision/configs/__init__.py
+++ b/official/vision/configs/__init__.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Configs package definition."""
+
+from official.vision.configs import image_classification
+from official.vision.configs import maskrcnn
+from official.vision.configs import retinanet
+from official.vision.configs import semantic_segmentation
+from official.vision.configs import video_classification
--- a/official/vision/configs/backbones.py
+++ b/official/vision/configs/backbones.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Backbones configurations."""
+import dataclasses
+from typing import Optional, List
+
+# Import libraries
+
+from official.modeling import hyperparams
+
+
+@dataclasses.dataclass
+class ResNet(hyperparams.Config):
+  """ResNet config."""
+  model_id: int = 50
+  depth_multiplier: float = 1.0
+  stem_type: str = 'v0'
+  se_ratio: float = 0.0
+  stochastic_depth_drop_rate: float = 0.0
+  scale_stem: bool = True
+  resnetd_shortcut: bool = False
+  replace_stem_max_pool: bool = False
+  bn_trainable: bool = True
+
+
+@dataclasses.dataclass
+class DilatedResNet(hyperparams.Config):
+  """DilatedResNet config."""
+  model_id: int = 50
+  output_stride: int = 16
+  multigrid: Optional[List[int]] = None
+  stem_type: str = 'v0'
+  last_stage_repeats: int = 1
+  se_ratio: float = 0.0
+  stochastic_depth_drop_rate: float = 0.0
+
+
+@dataclasses.dataclass
+class EfficientNet(hyperparams.Config):
+  """EfficientNet config."""
+  model_id: str = 'b0'
+  se_ratio: float = 0.0
+  stochastic_depth_drop_rate: float = 0.0
+
+
+@dataclasses.dataclass
+class MobileNet(hyperparams.Config):
+  """Mobilenet config."""
+  model_id: str = 'MobileNetV2'
+  filter_size_scale: float = 1.0
+  stochastic_depth_drop_rate: float = 0.0
+  output_stride: Optional[int] = None
+  output_intermediate_endpoints: bool = False
+
+
+@dataclasses.dataclass
+class SpineNet(hyperparams.Config):
+  """SpineNet config."""
+  model_id: str = '49'
+  stochastic_depth_drop_rate: float = 0.0
+  min_level: int = 3
+  max_level: int = 7
+
+
+@dataclasses.dataclass
+class SpineNetMobile(hyperparams.Config):
+  """SpineNet config."""
+  model_id: str = '49'
+  stochastic_depth_drop_rate: float = 0.0
+  se_ratio: float = 0.2
+  expand_ratio: int = 6
+  min_level: int = 3
+  max_level: int = 7
+  # If use_keras_upsampling_2d is True, model uses UpSampling2D keras layer
+  # instead of optimized custom TF op. It makes model be more keras style. We
+  # set this flag to True when we apply QAT from model optimization toolkit
+  # that requires the model should use keras layers.
+  use_keras_upsampling_2d: bool = False
+
+
+@dataclasses.dataclass
+class RevNet(hyperparams.Config):
+  """RevNet config."""
+  # Specifies the depth of RevNet.
+  model_id: int = 56
+
+
+@dataclasses.dataclass
+class MobileDet(hyperparams.Config):
+  """Mobiledet config."""
+  model_id: str = 'MobileDetCPU'
+  filter_size_scale: float = 1.0
+
+
+@dataclasses.dataclass
+class Backbone(hyperparams.OneOfConfig):
+  """Configuration for backbones.
+
+  Attributes:
+    type: 'str', type of backbone be used, one of the fields below.
+    resnet: resnet backbone config.
+    dilated_resnet: dilated resnet backbone for semantic segmentation config.
+    revnet: revnet backbone config.
+    efficientnet: efficientnet backbone config.
+    spinenet: spinenet backbone config.
+    spinenet_mobile: mobile spinenet backbone config.
+    mobilenet: mobilenet backbone config.
+    mobiledet: mobiledet backbone config.
+  """
+  type: Optional[str] = None
+  resnet: ResNet = ResNet()
+  dilated_resnet: DilatedResNet = DilatedResNet()
+  revnet: RevNet = RevNet()
+  efficientnet: EfficientNet = EfficientNet()
+  spinenet: SpineNet = SpineNet()
+  spinenet_mobile: SpineNetMobile = SpineNetMobile()
+  mobilenet: MobileNet = MobileNet()
+  mobiledet: MobileDet = MobileDet()
+
--- a/official/vision/configs/backbones_3d.py
+++ b/official/vision/configs/backbones_3d.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""3D Backbones configurations."""
+from typing import Optional, Tuple
+
+# Import libraries
+import dataclasses
+
+from official.modeling import hyperparams
+
+
+@dataclasses.dataclass
+class ResNet3DBlock(hyperparams.Config):
+  """Configuration of a ResNet 3D block."""
+  temporal_strides: int = 1
+  temporal_kernel_sizes: Tuple[int, ...] = ()
+  use_self_gating: bool = False
+
+
+@dataclasses.dataclass
+class ResNet3D(hyperparams.Config):
+  """ResNet config."""
+  model_id: int = 50
+  stem_type: str = 'v0'
+  stem_conv_temporal_kernel_size: int = 5
+  stem_conv_temporal_stride: int = 2
+  stem_pool_temporal_stride: int = 2
+  block_specs: Tuple[ResNet3DBlock, ...] = ()
+  stochastic_depth_drop_rate: float = 0.0
+  se_ratio: float = 0.0
+
+
+@dataclasses.dataclass
+class ResNet3D50(ResNet3D):
+  """Block specifications of the Resnet50 (3D) model."""
+  model_id: int = 50
+  block_specs: Tuple[
+      ResNet3DBlock, ResNet3DBlock, ResNet3DBlock, ResNet3DBlock] = (
+          ResNet3DBlock(temporal_strides=1,
+                        temporal_kernel_sizes=(3, 3, 3),
+                        use_self_gating=True),
+          ResNet3DBlock(temporal_strides=1,
+                        temporal_kernel_sizes=(3, 1, 3, 1),
+                        use_self_gating=True),
+          ResNet3DBlock(temporal_strides=1,
+                        temporal_kernel_sizes=(3, 1, 3, 1, 3, 1),
+                        use_self_gating=True),
+          ResNet3DBlock(temporal_strides=1,
+                        temporal_kernel_sizes=(1, 3, 1),
+                        use_self_gating=True))
+
+
+@dataclasses.dataclass
+class ResNet3DRS(ResNet3D):
+  """Block specifications of the ResNet-RS (3D) model."""
+  model_id: int = 50
+  stem_type: str = 'v1'
+  stem_conv_temporal_kernel_size: int = 5
+  stem_conv_temporal_stride: int = 2
+  stem_pool_temporal_stride: int = 2
+  stochastic_depth_drop_rate: float = 0.1
+  se_ratio: float = 0.2
+  block_specs: Tuple[
+      ResNet3DBlock, ResNet3DBlock, ResNet3DBlock, ResNet3DBlock] = (
+          ResNet3DBlock(temporal_strides=1,
+                        temporal_kernel_sizes=(1,),
+                        use_self_gating=True),
+          ResNet3DBlock(temporal_strides=1,
+                        temporal_kernel_sizes=(1,),
+                        use_self_gating=True),
+          ResNet3DBlock(temporal_strides=1,
+                        temporal_kernel_sizes=(3,),
+                        use_self_gating=True),
+          ResNet3DBlock(temporal_strides=1,
+                        temporal_kernel_sizes=(3,),
+                        use_self_gating=True))
+
+
+_RESNET3D50_DEFAULT_CFG = ResNet3D50()
+_RESNET3DRS_DEFAULT_CFG = ResNet3DRS()
+
+
+@dataclasses.dataclass
+class Backbone3D(hyperparams.OneOfConfig):
+  """Configuration for backbones.
+
+  Attributes:
+    type: 'str', type of backbone be used, one of the fields below.
+    resnet_3d: resnet3d backbone config.
+    resnet_3d_rs: resnet3d-rs backbone config.
+  """
+  type: Optional[str] = None
+  resnet_3d: ResNet3D = _RESNET3D50_DEFAULT_CFG
+  resnet_3d_rs: ResNet3D = _RESNET3DRS_DEFAULT_CFG
--- a/official/vision/configs/common.py
+++ b/official/vision/configs/common.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Common configurations."""
+
+import dataclasses
+from typing import List, Optional
+
+# Import libraries
+
+from official.core import config_definitions as cfg
+from official.modeling import hyperparams
+
+
+@dataclasses.dataclass
+class TfExampleDecoder(hyperparams.Config):
+  """A simple TF Example decoder config."""
+  regenerate_source_id: bool = False
+  mask_binarize_threshold: Optional[float] = None
+
+
+@dataclasses.dataclass
+class TfExampleDecoderLabelMap(hyperparams.Config):
+  """TF Example decoder with label map config."""
+  regenerate_source_id: bool = False
+  mask_binarize_threshold: Optional[float] = None
+  label_map: str = ''
+
+
+@dataclasses.dataclass
+class DataDecoder(hyperparams.OneOfConfig):
+  """Data decoder config.
+
+  Attributes:
+    type: 'str', type of data decoder be used, one of the fields below.
+    simple_decoder: simple TF Example decoder config.
+    label_map_decoder: TF Example decoder with label map config.
+  """
+  type: Optional[str] = 'simple_decoder'
+  simple_decoder: TfExampleDecoder = TfExampleDecoder()
+  label_map_decoder: TfExampleDecoderLabelMap = TfExampleDecoderLabelMap()
+
+
+@dataclasses.dataclass
+class RandAugment(hyperparams.Config):
+  """Configuration for RandAugment."""
+  num_layers: int = 2
+  magnitude: float = 10
+  cutout_const: float = 40
+  translate_const: float = 10
+  magnitude_std: float = 0.0
+  prob_to_apply: Optional[float] = None
+  exclude_ops: List[str] = dataclasses.field(default_factory=list)
+
+
+@dataclasses.dataclass
+class AutoAugment(hyperparams.Config):
+  """Configuration for AutoAugment."""
+  augmentation_name: str = 'v0'
+  cutout_const: float = 100
+  translate_const: float = 250
+
+
+@dataclasses.dataclass
+class RandomErasing(hyperparams.Config):
+  """Configuration for RandomErasing."""
+  probability: float = 0.25
+  min_area: float = 0.02
+  max_area: float = 1 / 3
+  min_aspect: float = 0.3
+  max_aspect = None
+  min_count = 1
+  max_count = 1
+  trials = 10
+
+
+@dataclasses.dataclass
+class MixupAndCutmix(hyperparams.Config):
+  """Configuration for MixupAndCutmix."""
+  mixup_alpha: float = .8
+  cutmix_alpha: float = 1.
+  prob: float = 1.0
+  switch_prob: float = 0.5
+  label_smoothing: float = 0.1
+
+
+@dataclasses.dataclass
+class Augmentation(hyperparams.OneOfConfig):
+  """Configuration for input data augmentation.
+
+  Attributes:
+    type: 'str', type of augmentation be used, one of the fields below.
+    randaug: RandAugment config.
+    autoaug: AutoAugment config.
+  """
+  type: Optional[str] = None
+  randaug: RandAugment = RandAugment()
+  autoaug: AutoAugment = AutoAugment()
+
+
+@dataclasses.dataclass
+class NormActivation(hyperparams.Config):
+  activation: str = 'relu'
+  use_sync_bn: bool = True
+  norm_momentum: float = 0.99
+  norm_epsilon: float = 0.001
+
+
+@dataclasses.dataclass
+class PseudoLabelDataConfig(cfg.DataConfig):
+  """Psuedo Label input config for training."""
+  input_path: str = ''
+  data_ratio: float = 1.0  # Per-batch ratio of pseudo-labeled to labeled data.
+  is_training: bool = True
+  dtype: str = 'float32'
+  shuffle_buffer_size: int = 10000
+  cycle_length: int = 10
+  aug_rand_hflip: bool = True
+  aug_type: Optional[
+      Augmentation] = None  # Choose from AutoAugment and RandAugment.
+  file_type: str = 'tfrecord'
+
+  # Keep for backward compatibility.
+  aug_policy: Optional[str] = None  # None, 'autoaug', or 'randaug'.
+  randaug_magnitude: Optional[int] = 10
+
+
+@dataclasses.dataclass
+class TFLitePostProcessingConfig(hyperparams.Config):
+  max_detections: int = 200
+  max_classes_per_detection: int = 5
+  # Regular NMS run in a multi-class fashion and is slow. Setting it to False
+  # uses class-agnostic NMS, which is faster.
+  use_regular_nms: bool = False
+  nms_score_threshold: float = 0.1
+  nms_iou_threshold: float = 0.5
--- a/official/vision/configs/decoders.py
+++ b/official/vision/configs/decoders.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Decoders configurations."""
+import dataclasses
+from typing import List, Optional
+
+# Import libraries
+
+from official.modeling import hyperparams
+
+
+@dataclasses.dataclass
+class Identity(hyperparams.Config):
+  """Identity config."""
+  pass
+
+
+@dataclasses.dataclass
+class FPN(hyperparams.Config):
+  """FPN config."""
+  num_filters: int = 256
+  fusion_type: str = 'sum'
+  use_separable_conv: bool = False
+
+
+@dataclasses.dataclass
+class NASFPN(hyperparams.Config):
+  """NASFPN config."""
+  num_filters: int = 256
+  num_repeats: int = 5
+  use_separable_conv: bool = False
+
+
+@dataclasses.dataclass
+class ASPP(hyperparams.Config):
+  """ASPP config."""
+  level: int = 4
+  dilation_rates: List[int] = dataclasses.field(default_factory=list)
+  dropout_rate: float = 0.0
+  num_filters: int = 256
+  use_depthwise_convolution: bool = False
+  pool_kernel_size: Optional[List[int]] = None  # Use global average pooling.
+  spp_layer_version: str = 'v1'
+  output_tensor: bool = False
+
+
+@dataclasses.dataclass
+class Decoder(hyperparams.OneOfConfig):
+  """Configuration for decoders.
+
+  Attributes:
+    type: 'str', type of decoder be used, one of the fields below.
+    fpn: fpn config.
+  """
+  type: Optional[str] = None
+  fpn: FPN = FPN()
+  nasfpn: NASFPN = NASFPN()
+  identity: Identity = Identity()
+  aspp: ASPP = ASPP()
--- a/official/vision/configs/experiments/image_classification/imagenet_mobilenetv2_gpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_mobilenetv2_gpu.yaml
+# MobileNetV2_1.0 ImageNet classification. 71.0% top-1 and 90.0% top-5 accuracy.
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float16'
+  loss_scale: 'dynamic'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetV2'
+        filter_size_scale: 1.0
+    dropout_rate: 0.2
+  losses:
+    l2_weight_decay: 0.00001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 1024  # 128 * 8
+    dtype: 'float16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 1024  # 128 * 8
+    dtype: 'float16'
+    drop_remainder: false
+trainer:
+  train_steps: 625500  # 500 epochs
+  validation_steps: 49
+  validation_interval: 1251
+  steps_per_loop: 1251  # NUM_EXAMPLES (1281167) // global_batch_size
+  summary_interval: 1251
+  checkpoint_interval: 1251
+  optimizer_config:
+    learning_rate:
+      type: 'exponential'
+      exponential:
+        initial_learning_rate: 0.064  # 0.008 * batch_size / 128
+        decay_steps: 3127  # 2.5 * steps_per_epoch
+        decay_rate: 0.96
+        staircase: true
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 6255
--- a/official/vision/configs/experiments/image_classification/imagenet_mobilenetv2_tpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_mobilenetv2_tpu.yaml
+# MobileNetV2_1.0 ImageNet classification. 72.72% top-1 and 91.05% top-5 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetV2'
+        filter_size_scale: 1.0
+    dropout_rate: 0.2
+  losses:
+    l2_weight_decay: 0.00001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 156000  # 500 epochs
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312  # NUM_EXAMPLES (1281167) // global_batch_size
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    learning_rate:
+      type: 'exponential'
+      exponential:
+        initial_learning_rate: 0.256  # 0.008 * batch_size / 128
+        decay_steps: 780  # 2.5 * steps_per_epoch
+        decay_rate: 0.96
+        staircase: true
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/configs/experiments/image_classification/imagenet_mobilenetv3large_tpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_mobilenetv3large_tpu.yaml
+# MobileNetV3-large_1.0 ImageNet classification: 74.96% top-1.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetV3Large'
+        filter_size_scale: 1.0
+    dropout_rate: 0.2
+  losses:
+    l2_weight_decay: 0.00001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    # Enables Inception-style pre-processing.
+    decode_jpeg_only: false
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+    # Enables Inception-style pre-processing.
+    decode_jpeg_only: false
+trainer:
+  train_steps: 156000  # 500 epochs
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312  # NUM_EXAMPLES (1281167) // global_batch_size
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        alpha: 0.0
+        decay_steps: 156000
+        initial_learning_rate: 0.5
+        name: CosineDecay
+        offset: 0
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 5000
--- a/official/vision/configs/experiments/image_classification/imagenet_mobilenetv3small_tpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_mobilenetv3small_tpu.yaml
+# MobileNetV3Small ImageNet classification. 67.5% top-1 and 87.6% top-5 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetV3Small'
+        filter_size_scale: 1.0
+    norm_activation:
+      activation: 'relu'
+      norm_momentum: 0.997
+      norm_epsilon: 0.001
+      use_sync_bn: false
+    dropout_rate: 0.2
+  losses:
+    l2_weight_decay: 0.00001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 312000  # 1000 epochs
+  validation_steps: 12
+  validation_interval: 312
+  steps_per_loop: 312  # NUM_EXAMPLES (1281167) // global_batch_size
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'rmsprop'
+      rmsprop:
+        rho: 0.9
+        momentum: 0.9
+        epsilon: 0.002
+    learning_rate:
+      type: 'exponential'
+      exponential:
+        initial_learning_rate: 0.01
+        decay_steps: 936  # 3 * steps_per_epoch
+        decay_rate: 0.99
+        staircase: true
+    ema:
+      average_decay: 0.9999
+      trainable_weights_only: false
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
+        warmup_learning_rate: 0.001
--- a/official/vision/configs/experiments/image_classification/imagenet_resnet101_deeplab_tpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnet101_deeplab_tpu.yaml
+# Top-1 accuracy 81.6% on ImageNet
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'dilated_resnet'
+      dilated_resnet:
+        model_id: 101
+        output_stride: 16
+        stem_type: 'v1'
+        se_ratio: 0.25
+        stochastic_depth_drop_rate: 0.2
+        multigrid: [1, 2, 4]
+        last_stage_repeats: 1
+    norm_activation:
+      activation: 'swish'
+  losses:
+    l2_weight_decay: 0.00004
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    aug_policy: 'randaug'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 109200
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6
+        decay_steps: 109200
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/configs/experiments/image_classification/imagenet_resnet101_tpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnet101_tpu.yaml
+# ResNet-101 ImageNet classification. 79.1% top-1 and 94.5% top-5 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 101
+    norm_activation:
+      activation: 'swish'
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 62400
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6
+        decay_steps: 62400
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/configs/experiments/image_classification/imagenet_resnet152_tpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnet152_tpu.yaml
+# ResNet-152 ImageNet classification. 79.4% top-1 and 94.7% top-5 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 152
+    norm_activation:
+      activation: 'swish'
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 62400
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6
+        decay_steps: 62400
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/configs/experiments/image_classification/imagenet_resnet50_deeplab_tpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnet50_deeplab_tpu.yaml
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'dilated_resnet'
+      dilated_resnet:
+        model_id: 50
+        output_stride: 16
+    norm_activation:
+      activation: 'swish'
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 62400
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6
+        decay_steps: 62400
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/configs/experiments/image_classification/imagenet_resnet50_gpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnet50_gpu.yaml
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float16'
+  loss_scale: 'dynamic'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 2048
+    dtype: 'float16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 2048
+    dtype: 'float16'
+    drop_remainder: false
+trainer:
+  train_steps: 56160
+  validation_steps: 25
+  validation_interval: 625
+  steps_per_loop: 625
+  summary_interval: 625
+  checkpoint_interval: 625
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'stepwise'
+      stepwise:
+        boundaries: [18750, 37500, 50000]
+        values: [0.8, 0.08, 0.008, 0.0008]
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 3125
--- a/official/vision/configs/experiments/image_classification/imagenet_resnet50_tfds_tpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnet50_tfds_tpu.yaml
+# ResNet-50 ImageNet classification. 78.1% top-1 and 93.9% top-5 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+    norm_activation:
+      activation: 'swish'
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: ''
+    tfds_name: 'imagenet2012'
+    tfds_split: 'train'
+    sharding: true
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: ''
+    tfds_name: 'imagenet2012'
+    tfds_split: 'validation'
+    sharding: true
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 62400
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6
+        decay_steps: 62400
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 50
+  losses:
+    l2_weight_decay: 0.0001
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 28080
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'stepwise'
+      stepwise:
+        boundaries: [9360, 18720, 24960]
+        values: [1.6, 0.16, 0.016, 0.0016]
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/configs/experiments/image_classification/imagenet_resnetrs101_i160.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnetrs101_i160.yaml
+# ResNet-RS-101 ImageNet classification. 80.2% top-1 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [160, 160, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 101
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        se_ratio: 0.25
+        stem_type: 'v1'
+        stochastic_depth_drop_rate: 0.0
+    norm_activation:
+      activation: 'swish'
+      norm_momentum: 0.0
+      use_sync_bn: false
+    dropout_rate: 0.25
+  losses:
+    l2_weight_decay: 0.00004
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    aug_type:
+      type: 'randaug'
+      randaug:
+        magnitude: 15
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 109200
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    ema:
+      average_decay: 0.9999
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6
+        decay_steps: 109200
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/configs/experiments/image_classification/imagenet_resnetrs101_i192.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnetrs101_i192.yaml
+# ResNet-RS-101 ImageNet classification. 81.3% top-1 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [192, 192, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 101
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        se_ratio: 0.25
+        stem_type: 'v1'
+        stochastic_depth_drop_rate: 0.0
+    norm_activation:
+      activation: 'swish'
+      norm_momentum: 0.0
+      use_sync_bn: false
+    dropout_rate: 0.25
+  losses:
+    l2_weight_decay: 0.00004
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    aug_type:
+      type: 'randaug'
+      randaug:
+        magnitude: 15
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 109200
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    ema:
+      average_decay: 0.9999
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6
+        decay_steps: 109200
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/configs/experiments/image_classification/imagenet_resnetrs152_i192.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnetrs152_i192.yaml
+# ResNet-RS-152 ImageNet classification. 81.9% top-1 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [192, 192, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 152
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        se_ratio: 0.25
+        stem_type: 'v1'
+        stochastic_depth_drop_rate: 0.0
+    norm_activation:
+      activation: 'swish'
+      norm_momentum: 0.0
+      use_sync_bn: false
+    dropout_rate: 0.25
+  losses:
+    l2_weight_decay: 0.00004
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    aug_type:
+      type: 'randaug'
+      randaug:
+        magnitude: 15
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 109200
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    ema:
+      average_decay: 0.9999
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6
+        decay_steps: 109200
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560
--- a/official/vision/configs/experiments/image_classification/imagenet_resnetrs152_i224.yaml
+++ b/official/vision/configs/experiments/image_classification/imagenet_resnetrs152_i224.yaml
+# ResNet-RS-152 ImageNet classification. 82.5% top-1 accuracy.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [224, 224, 3]
+    backbone:
+      type: 'resnet'
+      resnet:
+        model_id: 152
+        replace_stem_max_pool: true
+        resnetd_shortcut: true
+        se_ratio: 0.25
+        stem_type: 'v1'
+        stochastic_depth_drop_rate: 0.0
+    norm_activation:
+      activation: 'swish'
+      norm_momentum: 0.0
+      use_sync_bn: false
+    dropout_rate: 0.25
+  losses:
+    l2_weight_decay: 0.00004
+    one_hot: true
+    label_smoothing: 0.1
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    aug_type:
+      type: 'randaug'
+      randaug:
+        magnitude: 15
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 109200
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    ema:
+      average_decay: 0.9999
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'cosine'
+      cosine:
+        initial_learning_rate: 1.6
+        decay_steps: 109200
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1560