Removes unneeded content of the beta folder.

PiperOrigin-RevId: 437276665

Removes unneeded content of the beta folder.
PiperOrigin-RevId: 437276665
e4be7e00 · Yeqing Li · A. Unique TensorFlower · f47405b5 · f47405b5 · f47405b5
Commit e4be7e00 authored Mar 25, 2022 by Yeqing Li Committed by A. Unique TensorFlower Mar 25, 2022
20 changed files
--- a/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_tpu.yaml
+++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_tpu.yaml
-# SpineNet-49 COCO detection with protocal C config. Expecting 44.2% AP.
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  losses:
-    l2_weight_decay: 4.0e-05
-  model:
-    anchor:
-      anchor_size: 3
-      aspect_ratios: [0.5, 1.0, 2.0]
-      num_scales: 3
-    backbone:
-      spinenet:
-        stochastic_depth_drop_rate: 0.2
-        model_id: '49'
-      type: 'spinenet'
-    decoder:
-      type: 'identity'
-    head:
-      num_convs: 4
-      num_filters: 256
-    input_size: [640, 640, 3]
-    max_level: 7
-    min_level: 3
-    norm_activation:
-      activation: 'swish'
-      norm_epsilon: 0.001
-      norm_momentum: 0.99
-      use_sync_bn: true
-  train_data:
-    dtype: 'bfloat16'
-    global_batch_size: 256
-    is_training: true
-    parser:
-      aug_rand_hflip: true
-      aug_scale_max: 2.0
-      aug_scale_min: 0.1
-  validation_data:
-    dtype: 'bfloat16'
-    global_batch_size: 8
-    is_training: false
-trainer:
-  checkpoint_interval: 462
-  optimizer_config:
-    learning_rate:
-      stepwise:
-        boundaries: [219450, 226380]
-        values: [0.32, 0.032, 0.0032]
-      type: 'stepwise'
-    warmup:
-      linear:
-        warmup_learning_rate: 0.0067
-        warmup_steps: 2000
-  steps_per_loop: 462
-  train_steps: 231000
-  validation_interval: 462
-  validation_steps: 625
--- a/official/vision/beta/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml
+++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml
-# --experiment_type=retinanet_mobile_coco
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  losses:
-    l2_weight_decay: 3.0e-05
-  model:
-    anchor:
-      anchor_size: 3
-      aspect_ratios: [0.5, 1.0, 2.0]
-      num_scales: 3
-    backbone:
-      spinenet_mobile:
-        stochastic_depth_drop_rate: 0.2
-        model_id: '49S'
-        se_ratio: 0.2
-      type: 'spinenet_mobile'
-    decoder:
-      type: 'identity'
-    head:
-      num_convs: 4
-      num_filters: 40
-      use_separable_conv: true
-    input_size: [384, 384, 3]
-    max_level: 7
-    min_level: 3
-    norm_activation:
-      activation: 'swish'
-      norm_epsilon: 0.001
-      norm_momentum: 0.99
-      use_sync_bn: true
-  train_data:
-    dtype: 'bfloat16'
-    global_batch_size: 256
-    is_training: true
-    parser:
-      aug_rand_hflip: true
-      aug_scale_max: 2.0
-      aug_scale_min: 0.5
-  validation_data:
-    dtype: 'bfloat16'
-    global_batch_size: 8
-    is_training: false
-trainer:
-  checkpoint_interval: 462
-  optimizer_config:
-    learning_rate:
-      stepwise:
-        boundaries: [263340, 272580]
-        values: [0.32, 0.032, 0.0032]
-      type: 'stepwise'
-    warmup:
-      linear:
-        warmup_learning_rate: 0.0067
-        warmup_steps: 2000
-  steps_per_loop: 462
-  train_steps: 277200
-  validation_interval: 462
-  validation_steps: 625
--- a/official/vision/beta/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml
+++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml
-# --experiment_type=retinanet_mobile_coco
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  losses:
-    l2_weight_decay: 3.0e-05
-  model:
-    anchor:
-      anchor_size: 3
-      aspect_ratios: [0.5, 1.0, 2.0]
-      num_scales: 3
-    backbone:
-      spinenet_mobile:
-        stochastic_depth_drop_rate: 0.2
-        model_id: '49XS'
-        se_ratio: 0.2
-      type: 'spinenet_mobile'
-    decoder:
-      type: 'identity'
-    head:
-      num_convs: 4
-      num_filters: 24
-      use_separable_conv: true
-    input_size: [256, 256, 3]
-    max_level: 7
-    min_level: 3
-    norm_activation:
-      activation: 'swish'
-      norm_epsilon: 0.001
-      norm_momentum: 0.99
-      use_sync_bn: true
-  train_data:
-    dtype: 'bfloat16'
-    global_batch_size: 256
-    is_training: true
-    parser:
-      aug_rand_hflip: true
-      aug_scale_max: 2.0
-      aug_scale_min: 0.5
-  validation_data:
-    dtype: 'bfloat16'
-    global_batch_size: 8
-    is_training: false
-trainer:
-  checkpoint_interval: 462
-  optimizer_config:
-    learning_rate:
-      stepwise:
-        boundaries: [263340, 272580]
-        values: [0.32, 0.032, 0.0032]
-      type: 'stepwise'
-    warmup:
-      linear:
-        warmup_learning_rate: 0.0067
-        warmup_steps: 2000
-  steps_per_loop: 462
-  train_steps: 277200
-  validation_interval: 462
-  validation_steps: 625
--- a/official/vision/beta/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
+++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
-# SpineNet-96 COCO detection with protocol C config. Expecting 48.5% AP.
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  losses:
-    l2_weight_decay: 4.0e-05
-  model:
-    anchor:
-      anchor_size: 3
-      aspect_ratios: [0.5, 1.0, 2.0]
-      num_scales: 3
-    backbone:
-      spinenet:
-        stochastic_depth_drop_rate: 0.2
-        model_id: '96'
-      type: 'spinenet'
-    decoder:
-      type: 'identity'
-    head:
-      num_convs: 4
-      num_filters: 256
-    input_size: [1024, 1024, 3]
-    max_level: 7
-    min_level: 3
-    norm_activation:
-      activation: 'swish'
-      norm_epsilon: 0.001
-      norm_momentum: 0.99
-      use_sync_bn: true
-  train_data:
-    dtype: 'bfloat16'
-    global_batch_size: 256
-    is_training: true
-    parser:
-      aug_rand_hflip: true
-      aug_scale_max: 2.0
-      aug_scale_min: 0.1
-  validation_data:
-    dtype: 'bfloat16'
-    global_batch_size: 8
-    is_training: false
-trainer:
-  checkpoint_interval: 462
-  optimizer_config:
-    learning_rate:
-      stepwise:
-        boundaries: [219450, 226380]
-        values: [0.32, 0.032, 0.0032]
-      type: 'stepwise'
-    warmup:
-      linear:
-        warmup_learning_rate: 0.0067
-        warmup_steps: 2000
-  steps_per_loop: 462
-  train_steps: 231000
-  validation_interval: 462
-  validation_steps: 625
--- a/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
+++ b/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  annotation_file: ''  # Can't use annotation file when tfds is used.
-  losses:
-    l2_weight_decay: 0.0001
-  model:
-    num_classes: 91
-    max_level: 7
-    min_level: 3
-    input_size: [640, 640, 3]
-    norm_activation:
-      activation: relu
-      norm_epsilon: 0.001
-      norm_momentum: 0.99
-      use_sync_bn: true
-  train_data:
-    tfds_name: 'coco/2017'
-    tfds_split: 'train'
-    drop_remainder: true
-    dtype: bfloat16
-    global_batch_size: 256
-    input_path: ''
-    is_training: true
-    shuffle_buffer_size: 1000
-  validation_data:
-    tfds_name: 'coco/2017'
-    tfds_split: 'validation'
-    drop_remainder: true
-    dtype: bfloat16
-    global_batch_size: 8
-    input_path: ''
-    is_training: false
--- a/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml
+++ b/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml
-# Benchmarks runs on same instnace, change eval batch size to fit on 4x4 tpu
-task:
-  validation_data:
-    global_batch_size: 32
-trainer:
-  validation_interval: 1560
-  validation_steps: 156
--- a/official/vision/beta/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
+++ b/official/vision/beta/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
-# Use your own cityscapes preprocessed dataset. 79% meanIoU.
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'float32'
-task:
-  model:
-    num_classes: 19
-    input_size: [null, null, 3]
-    backbone:
-      type: 'dilated_resnet'
-      dilated_resnet:
-        model_id: 101
-        output_stride: 16
-        stem_type: 'v1'
-        se_ratio: 0.25
-        stochastic_depth_drop_rate: 0.2
-        multigrid: [1, 2, 4]
-        last_stage_repeats: 1
-    decoder:
-      aspp:
-        pool_kernel_size: [512, 1024]
-    head:
-      feature_fusion: 'deeplabv3plus'
-      low_level: 2
-      low_level_num_filters: 48
-    norm_activation:
-      activation: 'swish'
-      norm_epsilon: 0.001
-      norm_momentum: 0.99
-      use_sync_bn: true
-  losses:
-    top_k_percent_pixels: 1.0  # only backpropagate loss for the topk 100% pixels.
-  train_data:
-    output_size: [1024, 2048]
-    crop_size: [512, 1024]
-    input_path: ''
-    tfds_name: 'cityscapes/semantic_segmentation'
-    tfds_split: 'train'
-    is_training: true
-    global_batch_size: 16
-    dtype: 'float32'
-    aug_rand_hflip: true
-    aug_scale_max: 2.0
-    aug_scale_min: 0.5
-  validation_data:
-    output_size: [1024, 2048]
-    input_path: ''
-    tfds_name: 'cityscapes/semantic_segmentation'
-    tfds_split: 'validation'
-    is_training: false
-    global_batch_size: 16
-    dtype: 'float32'
-    drop_remainder: false
-    resize_eval_groundtruth: true
-trainer:
-  optimizer_config:
-    learning_rate:
-      polynomial:
-        decay_steps: 90000
-        initial_learning_rate: 0.01
-        power: 0.9
-      type: polynomial
-    optimizer:
-      sgd:
-        momentum: 0.9
-      type: sgd
-    warmup:
-      linear:
-        name: linear
-        warmup_learning_rate: 0
-        warmup_steps: 925
-      type: linear
-  steps_per_loop: 185
-  summary_interval: 185
-  train_steps: 90000
-  validation_interval: 185
-  validation_steps: 31
-  checkpoint_interval: 185
--- a/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml
-# 3D ResNet-50 video classification on Kinetics-400.
-#
-# --experiment_type=video_classification_kinetics400
-# Expected accuracy: 77.0% top-1, 93.0% top-5.
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  model:
-    dropout_rate: 0.5
-    norm_activation:
-      use_sync_bn: false
-    backbone:
-      resnet_3d:
-        block_specs: !!python/tuple
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 1
-          - 1
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 1
-          - 1
-          - 1
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: false
-        model_id: 50
-        stem_conv_temporal_kernel_size: 5
-        stem_conv_temporal_stride: 2
-        stem_pool_temporal_stride: 1
-  train_data:
-    name: kinetics400
-    feature_shape: !!python/tuple
-    - 32
-    - 224
-    - 224
-    - 3
-    temporal_stride: 2
-    global_batch_size: 1024
-    dtype: 'bfloat16'
-    shuffle_buffer_size: 1024
-    aug_max_area_ratio: 1.0
-    aug_max_aspect_ratio: 2.0
-    aug_min_area_ratio: 0.08
-    aug_min_aspect_ratio: 0.5
-  validation_data:
-    name: kinetics400
-    feature_shape: !!python/tuple
-    - 32
-    - 256
-    - 256
-    - 3
-    temporal_stride: 2
-    num_test_clips: 10
-    num_test_crops: 3
-    global_batch_size: 64
-    dtype: 'bfloat16'
-    drop_remainder: false
-trainer:
-  optimizer_config:
-    learning_rate:
-      cosine:
-        initial_learning_rate: 0.8
-        decay_steps: 42104
-    warmup:
-      linear:
-        warmup_steps: 1053
-  train_steps: 42104
-  steps_per_loop: 500
-  summary_interval: 500
-  validation_interval: 500
--- a/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
-# 3D ResNet-RS-50 video classification on Kinetics-400.
-#
-# --experiment_type=video_classification_kinetics400
-# Expected accuracy: 78.2% top-1 accuracy.
-runtime:
-  mixed_precision_dtype: bfloat16
-task:
-  losses:
-    l2_weight_decay: 0.00004
-    label_smoothing: 0.1
-    one_hot: true
-  model:
-    aggregate_endpoints: false
-    backbone:
-      resnet_3d_rs:
-        model_id: 50
-        stem_type: 'v1'
-        stem_conv_temporal_kernel_size: 5
-        stem_conv_temporal_stride: 2
-        stem_pool_temporal_stride: 1
-        stochastic_depth_drop_rate: 0.1
-        se_ratio: 0.25
-      type: resnet_3d_rs
-    dropout_rate: 0.5
-    model_type: video_classification
-    norm_activation:
-      activation: relu
-      norm_epsilon: 1.0e-05
-      norm_momentum: 0.0
-      use_sync_bn: false
-  train_data:
-    data_format: channels_last
-    drop_remainder: true
-    dtype: bfloat16
-    feature_shape: !!python/tuple
-    - 32
-    - 224
-    - 224
-    - 3
-    file_type: sstable
-    global_batch_size: 1024
-    is_training: true
-    min_image_size: 256
-    name: kinetics400
-    num_channels: 3
-    num_classes: 400
-    num_examples: 215570
-    num_test_clips: 1
-    num_test_crops: 1
-    one_hot: true
-    temporal_stride: 2
-    aug_max_area_ratio: 1.0
-    aug_max_aspect_ratio: 2.0
-    aug_min_area_ratio: 0.08
-    aug_min_aspect_ratio: 0.5
-  validation_data:
-    data_format: channels_last
-    drop_remainder: false
-    dtype: bfloat16
-    feature_shape: !!python/tuple
-    - 32
-    - 256
-    - 256
-    - 3
-    file_type: sstable
-    global_batch_size: 64
-    is_training: false
-    min_image_size: 256
-    name: kinetics400
-    num_channels: 3
-    num_classes: 400
-    num_examples: 17706
-    num_test_clips: 10
-    num_test_crops: 3
-    one_hot: true
-    temporal_stride: 2
-trainer:
-  checkpoint_interval: 210
-  max_to_keep: 3
-  optimizer_config:
-    ema:
-      average_decay: 0.9999
-      trainable_weights_only: false
-    learning_rate:
-      cosine:
-        decay_steps: 73682
-        initial_learning_rate: 0.8
-        name: CosineDecay
-      type: cosine
-    warmup:
-      linear:
-        name: linear
-        warmup_learning_rate: 0
-        warmup_steps: 1050
-      type: linear
-  train_steps: 73682
-  steps_per_loop: 500
-  summary_interval: 500
-  validation_interval: 500
--- a/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml
-# SlowOnly 16x4 video classification on Kinetics-400.
-#
-# --experiment_type=video_classification_kinetics400
-# Expected accuracy: 75.6% top-1, 92.1% top-5.
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  model:
-    dropout_rate: 0.5
-    norm_activation:
-      use_sync_bn: false
-    backbone:
-      resnet_3d:
-        block_specs: !!python/tuple
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 1
-          - 1
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 1
-          - 1
-          - 1
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: false
-        model_id: 50
-        stem_conv_temporal_kernel_size: 1
-        stem_conv_temporal_stride: 1
-        stem_pool_temporal_stride: 1
-  train_data:
-    name: kinetics400
-    feature_shape: !!python/tuple
-    - 16
-    - 224
-    - 224
-    - 3
-    temporal_stride: 4
-    global_batch_size: 1024
-    dtype: 'bfloat16'
-    shuffle_buffer_size: 1024
-    aug_max_area_ratio: 1.0
-    aug_max_aspect_ratio: 2.0
-    aug_min_area_ratio: 0.08
-    aug_min_aspect_ratio: 0.5
-  validation_data:
-    name: kinetics400
-    feature_shape: !!python/tuple
-    - 16
-    - 256
-    - 256
-    - 3
-    temporal_stride: 4
-    num_test_clips: 10
-    num_test_crops: 3
-    global_batch_size: 64
-    dtype: 'bfloat16'
-    drop_remainder: false
-trainer:
-  optimizer_config:
-    learning_rate:
-      cosine:
-        initial_learning_rate: 0.8
-        decay_steps: 42104
-    warmup:
-      linear:
-        warmup_steps: 1053
-  train_steps: 42104
-  steps_per_loop: 500
-  summary_interval: 500
-  validation_interval: 500
--- a/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
-# SlowOnly 8x8 video classification on Kinetics-400.
-#
-# --experiment_type=video_classification_kinetics400
-# Expected accuracy: 74.1% top-1, 91.4% top-5.
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  model:
-    dropout_rate: 0.5
-    norm_activation:
-      use_sync_bn: false
-    backbone:
-      resnet_3d:
-        block_specs: !!python/tuple
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 1
-          - 1
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 1
-          - 1
-          - 1
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: false
-        model_id: 50
-        stem_conv_temporal_kernel_size: 1
-        stem_conv_temporal_stride: 1
-        stem_pool_temporal_stride: 1
-  train_data:
-    name: kinetics400
-    feature_shape: !!python/tuple
-    - 8
-    - 224
-    - 224
-    - 3
-    temporal_stride: 8
-    global_batch_size: 1024
-    dtype: 'bfloat16'
-    shuffle_buffer_size: 1024
-    aug_max_area_ratio: 1.0
-    aug_max_aspect_ratio: 2.0
-    aug_min_area_ratio: 0.08
-    aug_min_aspect_ratio: 0.5
-  validation_data:
-    name: kinetics400
-    feature_shape: !!python/tuple
-    - 8
-    - 256
-    - 256
-    - 3
-    temporal_stride: 8
-    num_test_clips: 10
-    num_test_crops: 3
-    global_batch_size: 64
-    dtype: 'bfloat16'
-    drop_remainder: false
-trainer:
-  optimizer_config:
-    learning_rate:
-      cosine:
-        initial_learning_rate: 0.8
-        decay_steps: 42104
-    warmup:
-      linear:
-        warmup_steps: 1053
-  train_steps: 42104
-  steps_per_loop: 500
-  summary_interval: 500
-  validation_interval: 500
--- a/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml
-# 3D ResNet-50 video classification on Kinetics-600.
-#
-# --experiment_type=video_classification_kinetics600
-# Expected accuracy: 79.5% top-1, 94.8% top-5.
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  model:
-    dropout_rate: 0.5
-    norm_activation:
-      use_sync_bn: false
-    backbone:
-      resnet_3d:
-        block_specs: !!python/tuple
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 1
-          - 1
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 1
-          - 1
-          - 1
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: false
-        model_id: 50
-        stem_conv_temporal_kernel_size: 5
-        stem_conv_temporal_stride: 2
-        stem_pool_temporal_stride: 1
-  train_data:
-    name: kinetics600
-    feature_shape: !!python/tuple
-    - 32
-    - 224
-    - 224
-    - 3
-    temporal_stride: 2
-    global_batch_size: 1024
-    dtype: 'bfloat16'
-    shuffle_buffer_size: 1024
-    aug_max_area_ratio: 1.0
-    aug_max_aspect_ratio: 2.0
-    aug_min_area_ratio: 0.08
-    aug_min_aspect_ratio: 0.5
-  validation_data:
-    name: kinetics600
-    feature_shape: !!python/tuple
-    - 32
-    - 256
-    - 256
-    - 3
-    temporal_stride: 2
-    num_test_clips: 10
-    num_test_crops: 3
-    global_batch_size: 64
-    dtype: 'bfloat16'
-    drop_remainder: false
-trainer:
-  optimizer_config:
-    learning_rate:
-      cosine:
-        initial_learning_rate: 0.8
-        decay_steps: 71488
-    warmup:
-      linear:
-        warmup_steps: 1787
-  train_steps: 71488
-  steps_per_loop: 500
-  summary_interval: 500
-  validation_interval: 500
--- a/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50g_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50g_tpu.yaml
-# 3D ResNet-50g video classification on Kinetics-600.
-#
-# --experiment_type=video_classification_kinetics600
-# Expected accuracy: 78.7% accuracy, 93.6% top-5.
-# Train on TPU: v3-128, eval on TPU: v3-32
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  init_checkpoint: null
-  init_checkpoint_modules: all
-  losses:
-    l2_weight_decay: 0.0001
-    label_smoothing: 0.0
-  model:
-    aggregate_endpoints: false
-    backbone:
-      resnet_3d:
-        block_specs: !!python/tuple
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: true
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 1
-          - 3
-          - 1
-          temporal_strides: 1
-          use_self_gating: true
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 1
-          - 3
-          - 1
-          - 3
-          - 1
-          temporal_strides: 1
-          use_self_gating: true
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 3
-          - 1
-          temporal_strides: 1
-          use_self_gating: true
-        model_id: 50
-        stem_conv_temporal_kernel_size: 5
-        stem_conv_temporal_stride: 2
-        stem_pool_temporal_stride: 2
-        stem_type: v0
-        stochastic_depth_drop_rate: 0.0
-      type: resnet_3d
-    dropout_rate: 0.2
-    model_type: video_classification
-    norm_activation:
-      activation: relu
-      norm_epsilon: 1.0e-05
-      norm_momentum: 0.9
-      use_sync_bn: false
-  train_data:
-    aug_max_area_ratio: 1.0
-    aug_max_aspect_ratio: 2.0
-    aug_min_area_ratio: 0.49
-    aug_min_aspect_ratio: 0.5
-    drop_remainder: true
-    dtype: 'bfloat16'
-    feature_shape: !!python/tuple
-    - 64
-    - 224
-    - 224
-    - 3
-    global_batch_size: 1024
-    min_image_size: 256
-    name: kinetics600
-    num_classes: 600
-    split: train
-  validation_data:
-    dtype: 'bfloat16'
-    feature_shape: !!python/tuple
-    - 250
-    - 224
-    - 224
-    - 3
-    global_batch_size: 64
-    min_image_size: 256
-    name: kinetics600
-    num_classes: 600
-    num_examples: 27780
-    num_test_clips: 1
-    num_test_crops: 1
-    one_hot: true
-trainer:
-  optimizer_config:
-    learning_rate:
-      cosine:
-        alpha: 0.0
-        decay_steps: 71400
-        initial_learning_rate: 1.6
-        name: CosineDecay
-      type: cosine
-    warmup:
-      linear:
-        name: linear
-        warmup_learning_rate: 0
-        warmup_steps: 1785
-      type: linear
-  train_steps: 71400
-  steps_per_loop: 500
-  summary_interval: 500
-  validation_interval: 500
--- a/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml
-# SlowOnly 8x8 video classification on Kinetics-600.
-#
-# --experiment_type=video_classification_kinetics600
-# Expected accuracy: 77.3% top-1, 93.6% top-5.
-runtime:
-  distribution_strategy: 'tpu'
-  mixed_precision_dtype: 'bfloat16'
-task:
-  model:
-    dropout_rate: 0.5
-    norm_activation:
-      use_sync_bn: false
-    backbone:
-      resnet_3d:
-        block_specs: !!python/tuple
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 1
-          - 1
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 1
-          - 1
-          - 1
-          - 1
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: false
-        - temporal_kernel_sizes: !!python/tuple
-          - 3
-          - 3
-          - 3
-          temporal_strides: 1
-          use_self_gating: false
-        model_id: 50
-        stem_conv_temporal_kernel_size: 1
-        stem_conv_temporal_stride: 1
-        stem_pool_temporal_stride: 1
-  train_data:
-    name: kinetics600
-    feature_shape: !!python/tuple
-    - 8
-    - 224
-    - 224
-    - 3
-    temporal_stride: 8
-    global_batch_size: 1024
-    dtype: 'bfloat16'
-    shuffle_buffer_size: 1024
-    aug_max_area_ratio: 1.0
-    aug_max_aspect_ratio: 2.0
-    aug_min_area_ratio: 0.08
-    aug_min_aspect_ratio: 0.5
-  validation_data:
-    name: kinetics600
-    feature_shape: !!python/tuple
-    - 8
-    - 256
-    - 256
-    - 3
-    temporal_stride: 8
-    num_test_clips: 10
-    num_test_crops: 3
-    global_batch_size: 64
-    dtype: 'bfloat16'
-    drop_remainder: false
-trainer:
-  optimizer_config:
-    learning_rate:
-      cosine:
-        initial_learning_rate: 0.8
-        decay_steps: 71488
-    warmup:
-      linear:
-        warmup_steps: 1787
-  train_steps: 71488
-  steps_per_loop: 500
-  summary_interval: 500
-  validation_interval: 500
--- a/official/vision/beta/configs/image_classification.py
+++ b/official/vision/beta/configs/image_classification.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Image classification configuration definition."""
-import dataclasses
-import os
-from typing import List, Optional
-
-from official.core import config_definitions as cfg
-from official.core import exp_factory
-from official.modeling import hyperparams
-from official.modeling import optimization
-from official.vision.beta.configs import common
-from official.vision.beta.configs import backbones
-
-
-@dataclasses.dataclass
-class DataConfig(cfg.DataConfig):
-  """Input config for training."""
-  input_path: str = ''
-  global_batch_size: int = 0
-  is_training: bool = True
-  dtype: str = 'float32'
-  shuffle_buffer_size: int = 10000
-  cycle_length: int = 10
-  is_multilabel: bool = False
-  aug_rand_hflip: bool = True
-  aug_type: Optional[
-      common.Augmentation] = None  # Choose from AutoAugment and RandAugment.
-  color_jitter: float = 0.
-  random_erasing: Optional[common.RandomErasing] = None
-  file_type: str = 'tfrecord'
-  image_field_key: str = 'image/encoded'
-  label_field_key: str = 'image/class/label'
-  decode_jpeg_only: bool = True
-  mixup_and_cutmix: Optional[common.MixupAndCutmix] = None
-  decoder: Optional[common.DataDecoder] = common.DataDecoder()
-
-  # Keep for backward compatibility.
-  aug_policy: Optional[str] = None  # None, 'autoaug', or 'randaug'.
-  randaug_magnitude: Optional[int] = 10
-
-
-@dataclasses.dataclass
-class ImageClassificationModel(hyperparams.Config):
-  """The model config."""
-  num_classes: int = 0
-  input_size: List[int] = dataclasses.field(default_factory=list)
-  backbone: backbones.Backbone = backbones.Backbone(
-      type='resnet', resnet=backbones.ResNet())
-  dropout_rate: float = 0.0
-  norm_activation: common.NormActivation = common.NormActivation(
-      use_sync_bn=False)
-  # Adds a BatchNormalization layer pre-GlobalAveragePooling in classification
-  add_head_batch_norm: bool = False
-  kernel_initializer: str = 'random_uniform'
-
-
-@dataclasses.dataclass
-class Losses(hyperparams.Config):
-  loss_weight: float = 1.0
-  one_hot: bool = True
-  label_smoothing: float = 0.0
-  l2_weight_decay: float = 0.0
-  soft_labels: bool = False
-
-
-@dataclasses.dataclass
-class Evaluation(hyperparams.Config):
-  top_k: int = 5
-
-
-@dataclasses.dataclass
-class ImageClassificationTask(cfg.TaskConfig):
-  """The task config."""
-  model: ImageClassificationModel = ImageClassificationModel()
-  train_data: DataConfig = DataConfig(is_training=True)
-  validation_data: DataConfig = DataConfig(is_training=False)
-  losses: Losses = Losses()
-  evaluation: Evaluation = Evaluation()
-  init_checkpoint: Optional[str] = None
-  init_checkpoint_modules: str = 'all'  # all or backbone
-  model_output_keys: Optional[List[int]] = dataclasses.field(
-      default_factory=list)
-
-
-@exp_factory.register_config_factory('image_classification')
-def image_classification() -> cfg.ExperimentConfig:
-  """Image classification general."""
-  return cfg.ExperimentConfig(
-      task=ImageClassificationTask(),
-      trainer=cfg.TrainerConfig(),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None'
-      ])
-
-
-IMAGENET_TRAIN_EXAMPLES = 1281167
-IMAGENET_VAL_EXAMPLES = 50000
-IMAGENET_INPUT_PATH_BASE = 'imagenet-2012-tfrecord'
-
-
-@exp_factory.register_config_factory('resnet_imagenet')
-def image_classification_imagenet() -> cfg.ExperimentConfig:
-  """Image classification on imagenet with resnet."""
-  train_batch_size = 4096
-  eval_batch_size = 4096
-  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
-  config = cfg.ExperimentConfig(
-      runtime=cfg.RuntimeConfig(enable_xla=True),
-      task=ImageClassificationTask(
-          model=ImageClassificationModel(
-              num_classes=1001,
-              input_size=[224, 224, 3],
-              backbone=backbones.Backbone(
-                  type='resnet', resnet=backbones.ResNet(model_id=50)),
-              norm_activation=common.NormActivation(
-                  norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)),
-          losses=Losses(l2_weight_decay=1e-4),
-          train_data=DataConfig(
-              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size),
-          validation_data=DataConfig(
-              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
-              is_training=False,
-              global_batch_size=eval_batch_size)),
-      trainer=cfg.TrainerConfig(
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          train_steps=90 * steps_per_epoch,
-          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'sgd',
-                  'sgd': {
-                      'momentum': 0.9
-                  }
-              },
-              'learning_rate': {
-                  'type': 'stepwise',
-                  'stepwise': {
-                      'boundaries': [
-                          30 * steps_per_epoch, 60 * steps_per_epoch,
-                          80 * steps_per_epoch
-                      ],
-                      'values': [
-                          0.1 * train_batch_size / 256,
-                          0.01 * train_batch_size / 256,
-                          0.001 * train_batch_size / 256,
-                          0.0001 * train_batch_size / 256,
-                      ]
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 5 * steps_per_epoch,
-                      'warmup_learning_rate': 0
-                  }
-              }
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None'
-      ])
-
-  return config
-
-
-@exp_factory.register_config_factory('resnet_rs_imagenet')
-def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig:
-  """Image classification on imagenet with resnet-rs."""
-  train_batch_size = 4096
-  eval_batch_size = 4096
-  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
-  config = cfg.ExperimentConfig(
-      task=ImageClassificationTask(
-          model=ImageClassificationModel(
-              num_classes=1001,
-              input_size=[160, 160, 3],
-              backbone=backbones.Backbone(
-                  type='resnet',
-                  resnet=backbones.ResNet(
-                      model_id=50,
-                      stem_type='v1',
-                      resnetd_shortcut=True,
-                      replace_stem_max_pool=True,
-                      se_ratio=0.25,
-                      stochastic_depth_drop_rate=0.0)),
-              dropout_rate=0.25,
-              norm_activation=common.NormActivation(
-                  norm_momentum=0.0,
-                  norm_epsilon=1e-5,
-                  use_sync_bn=False,
-                  activation='swish')),
-          losses=Losses(l2_weight_decay=4e-5, label_smoothing=0.1),
-          train_data=DataConfig(
-              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size,
-              aug_type=common.Augmentation(
-                  type='randaug', randaug=common.RandAugment(magnitude=10))),
-          validation_data=DataConfig(
-              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
-              is_training=False,
-              global_batch_size=eval_batch_size)),
-      trainer=cfg.TrainerConfig(
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          train_steps=350 * steps_per_epoch,
-          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'sgd',
-                  'sgd': {
-                      'momentum': 0.9
-                  }
-              },
-              'ema': {
-                  'average_decay': 0.9999,
-                  'trainable_weights_only': False,
-              },
-              'learning_rate': {
-                  'type': 'cosine',
-                  'cosine': {
-                      'initial_learning_rate': 1.6,
-                      'decay_steps': 350 * steps_per_epoch
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 5 * steps_per_epoch,
-                      'warmup_learning_rate': 0
-                  }
-              }
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None'
-      ])
-  return config
-
-
-@exp_factory.register_config_factory('revnet_imagenet')
-def image_classification_imagenet_revnet() -> cfg.ExperimentConfig:
-  """Returns a revnet config for image classification on imagenet."""
-  train_batch_size = 4096
-  eval_batch_size = 4096
-  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
-
-  config = cfg.ExperimentConfig(
-      task=ImageClassificationTask(
-          model=ImageClassificationModel(
-              num_classes=1001,
-              input_size=[224, 224, 3],
-              backbone=backbones.Backbone(
-                  type='revnet', revnet=backbones.RevNet(model_id=56)),
-              norm_activation=common.NormActivation(
-                  norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False),
-              add_head_batch_norm=True),
-          losses=Losses(l2_weight_decay=1e-4),
-          train_data=DataConfig(
-              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size),
-          validation_data=DataConfig(
-              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
-              is_training=False,
-              global_batch_size=eval_batch_size)),
-      trainer=cfg.TrainerConfig(
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          train_steps=90 * steps_per_epoch,
-          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'sgd',
-                  'sgd': {
-                      'momentum': 0.9
-                  }
-              },
-              'learning_rate': {
-                  'type': 'stepwise',
-                  'stepwise': {
-                      'boundaries': [
-                          30 * steps_per_epoch, 60 * steps_per_epoch,
-                          80 * steps_per_epoch
-                      ],
-                      'values': [0.8, 0.08, 0.008, 0.0008]
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 5 * steps_per_epoch,
-                      'warmup_learning_rate': 0
-                  }
-              }
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None'
-      ])
-
-  return config
-
-
-@exp_factory.register_config_factory('mobilenet_imagenet')
-def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
-  """Image classification on imagenet with mobilenet."""
-  train_batch_size = 4096
-  eval_batch_size = 4096
-  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
-  config = cfg.ExperimentConfig(
-      task=ImageClassificationTask(
-          model=ImageClassificationModel(
-              num_classes=1001,
-              dropout_rate=0.2,
-              input_size=[224, 224, 3],
-              backbone=backbones.Backbone(
-                  type='mobilenet',
-                  mobilenet=backbones.MobileNet(
-                      model_id='MobileNetV2', filter_size_scale=1.0)),
-              norm_activation=common.NormActivation(
-                  norm_momentum=0.997, norm_epsilon=1e-3, use_sync_bn=False)),
-          losses=Losses(l2_weight_decay=1e-5, label_smoothing=0.1),
-          train_data=DataConfig(
-              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size),
-          validation_data=DataConfig(
-              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
-              is_training=False,
-              global_batch_size=eval_batch_size)),
-      trainer=cfg.TrainerConfig(
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          train_steps=500 * steps_per_epoch,
-          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'rmsprop',
-                  'rmsprop': {
-                      'rho': 0.9,
-                      'momentum': 0.9,
-                      'epsilon': 0.002,
-                  }
-              },
-              'learning_rate': {
-                  'type': 'exponential',
-                  'exponential': {
-                      'initial_learning_rate':
-                          0.008 * (train_batch_size // 128),
-                      'decay_steps':
-                          int(2.5 * steps_per_epoch),
-                      'decay_rate':
-                          0.98,
-                      'staircase':
-                          True
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 5 * steps_per_epoch,
-                      'warmup_learning_rate': 0
-                  }
-              },
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None'
-      ])
-
-  return config
--- a/official/vision/beta/configs/image_classification_test.py
+++ b/official/vision/beta/configs/image_classification_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for image_classification."""
-# pylint: disable=unused-import
-from absl.testing import parameterized
-import tensorflow as tf
-
-from official.core import config_definitions as cfg
-from official.core import exp_factory
-from official.vision import beta
-from official.vision.beta.configs import image_classification as exp_cfg
-
-
-class ImageClassificationConfigTest(tf.test.TestCase, parameterized.TestCase):
-
-  @parameterized.parameters(
-      ('resnet_imagenet',),
-      ('resnet_rs_imagenet',),
-      ('revnet_imagenet',),
-      ('mobilenet_imagenet'),
-  )
-  def test_image_classification_configs(self, config_name):
-    config = exp_factory.get_exp_config(config_name)
-    self.assertIsInstance(config, cfg.ExperimentConfig)
-    self.assertIsInstance(config.task, exp_cfg.ImageClassificationTask)
-    self.assertIsInstance(config.task.model,
-                          exp_cfg.ImageClassificationModel)
-    self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
-    config.validate()
-    config.task.train_data.is_training = None
-    with self.assertRaises(KeyError):
-      config.validate()
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/beta/configs/maskrcnn.py
+++ b/official/vision/beta/configs/maskrcnn.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""R-CNN(-RS) configuration definition."""
-
-import dataclasses
-import os
-from typing import List, Optional, Union
-
-from official.core import config_definitions as cfg
-from official.core import exp_factory
-from official.modeling import hyperparams
-from official.modeling import optimization
-from official.vision.beta.configs import common
-from official.vision.beta.configs import decoders
-from official.vision.beta.configs import backbones
-
-
-# pylint: disable=missing-class-docstring
-@dataclasses.dataclass
-class Parser(hyperparams.Config):
-  num_channels: int = 3
-  match_threshold: float = 0.5
-  unmatched_threshold: float = 0.5
-  aug_rand_hflip: bool = False
-  aug_scale_min: float = 1.0
-  aug_scale_max: float = 1.0
-  skip_crowd_during_training: bool = True
-  max_num_instances: int = 100
-  rpn_match_threshold: float = 0.7
-  rpn_unmatched_threshold: float = 0.3
-  rpn_batch_size_per_im: int = 256
-  rpn_fg_fraction: float = 0.5
-  mask_crop_size: int = 112
-
-
-@dataclasses.dataclass
-class DataConfig(cfg.DataConfig):
-  """Input config for training."""
-  input_path: str = ''
-  global_batch_size: int = 0
-  is_training: bool = False
-  dtype: str = 'bfloat16'
-  decoder: common.DataDecoder = common.DataDecoder()
-  parser: Parser = Parser()
-  shuffle_buffer_size: int = 10000
-  file_type: str = 'tfrecord'
-  drop_remainder: bool = True
-  # Number of examples in the data set, it's used to create the annotation file.
-  num_examples: int = -1
-
-
-@dataclasses.dataclass
-class Anchor(hyperparams.Config):
-  num_scales: int = 1
-  aspect_ratios: List[float] = dataclasses.field(
-      default_factory=lambda: [0.5, 1.0, 2.0])
-  anchor_size: float = 8.0
-
-
-@dataclasses.dataclass
-class RPNHead(hyperparams.Config):
-  num_convs: int = 1
-  num_filters: int = 256
-  use_separable_conv: bool = False
-
-
-@dataclasses.dataclass
-class DetectionHead(hyperparams.Config):
-  num_convs: int = 4
-  num_filters: int = 256
-  use_separable_conv: bool = False
-  num_fcs: int = 1
-  fc_dims: int = 1024
-  class_agnostic_bbox_pred: bool = False  # Has to be True for Cascade RCNN.
-  # If additional IoUs are passed in 'cascade_iou_thresholds'
-  # then ensemble the class probabilities from all heads.
-  cascade_class_ensemble: bool = False
-
-
-@dataclasses.dataclass
-class ROIGenerator(hyperparams.Config):
-  pre_nms_top_k: int = 2000
-  pre_nms_score_threshold: float = 0.0
-  pre_nms_min_size_threshold: float = 0.0
-  nms_iou_threshold: float = 0.7
-  num_proposals: int = 1000
-  test_pre_nms_top_k: int = 1000
-  test_pre_nms_score_threshold: float = 0.0
-  test_pre_nms_min_size_threshold: float = 0.0
-  test_nms_iou_threshold: float = 0.7
-  test_num_proposals: int = 1000
-  use_batched_nms: bool = False
-
-
-@dataclasses.dataclass
-class ROISampler(hyperparams.Config):
-  mix_gt_boxes: bool = True
-  num_sampled_rois: int = 512
-  foreground_fraction: float = 0.25
-  foreground_iou_threshold: float = 0.5
-  background_iou_high_threshold: float = 0.5
-  background_iou_low_threshold: float = 0.0
-  # IoU thresholds for additional FRCNN heads in Cascade mode.
-  # `foreground_iou_threshold` is the first threshold.
-  cascade_iou_thresholds: Optional[List[float]] = None
-
-
-@dataclasses.dataclass
-class ROIAligner(hyperparams.Config):
-  crop_size: int = 7
-  sample_offset: float = 0.5
-
-
-@dataclasses.dataclass
-class DetectionGenerator(hyperparams.Config):
-  apply_nms: bool = True
-  pre_nms_top_k: int = 5000
-  pre_nms_score_threshold: float = 0.05
-  nms_iou_threshold: float = 0.5
-  max_num_detections: int = 100
-  nms_version: str = 'v2'  # `v2`, `v1`, `batched`
-  use_cpu_nms: bool = False
-  soft_nms_sigma: Optional[float] = None  # Only works when nms_version='v1'.
-
-
-@dataclasses.dataclass
-class MaskHead(hyperparams.Config):
-  upsample_factor: int = 2
-  num_convs: int = 4
-  num_filters: int = 256
-  use_separable_conv: bool = False
-  class_agnostic: bool = False
-
-
-@dataclasses.dataclass
-class MaskSampler(hyperparams.Config):
-  num_sampled_masks: int = 128
-
-
-@dataclasses.dataclass
-class MaskROIAligner(hyperparams.Config):
-  crop_size: int = 14
-  sample_offset: float = 0.5
-
-
-@dataclasses.dataclass
-class MaskRCNN(hyperparams.Config):
-  num_classes: int = 0
-  input_size: List[int] = dataclasses.field(default_factory=list)
-  min_level: int = 2
-  max_level: int = 6
-  anchor: Anchor = Anchor()
-  include_mask: bool = True
-  backbone: backbones.Backbone = backbones.Backbone(
-      type='resnet', resnet=backbones.ResNet())
-  decoder: decoders.Decoder = decoders.Decoder(
-      type='fpn', fpn=decoders.FPN())
-  rpn_head: RPNHead = RPNHead()
-  detection_head: DetectionHead = DetectionHead()
-  roi_generator: ROIGenerator = ROIGenerator()
-  roi_sampler: ROISampler = ROISampler()
-  roi_aligner: ROIAligner = ROIAligner()
-  detection_generator: DetectionGenerator = DetectionGenerator()
-  mask_head: Optional[MaskHead] = MaskHead()
-  mask_sampler: Optional[MaskSampler] = MaskSampler()
-  mask_roi_aligner: Optional[MaskROIAligner] = MaskROIAligner()
-  norm_activation: common.NormActivation = common.NormActivation(
-      norm_momentum=0.997,
-      norm_epsilon=0.0001,
-      use_sync_bn=True)
-
-
-@dataclasses.dataclass
-class Losses(hyperparams.Config):
-  loss_weight: float = 1.0
-  rpn_huber_loss_delta: float = 1. / 9.
-  frcnn_huber_loss_delta: float = 1.
-  l2_weight_decay: float = 0.0
-  rpn_score_weight: float = 1.0
-  rpn_box_weight: float = 1.0
-  frcnn_class_weight: float = 1.0
-  frcnn_box_weight: float = 1.0
-  mask_weight: float = 1.0
-
-
-@dataclasses.dataclass
-class MaskRCNNTask(cfg.TaskConfig):
-  model: MaskRCNN = MaskRCNN()
-  train_data: DataConfig = DataConfig(is_training=True)
-  validation_data: DataConfig = DataConfig(is_training=False,
-                                           drop_remainder=False)
-  losses: Losses = Losses()
-  init_checkpoint: Optional[str] = None
-  init_checkpoint_modules: Union[
-      str, List[str]] = 'all'  # all, backbone, and/or decoder
-  annotation_file: Optional[str] = None
-  per_category_metrics: bool = False
-  # If set, we only use masks for the specified class IDs.
-  allowed_mask_class_ids: Optional[List[int]] = None
-  # If set, the COCO metrics will be computed.
-  use_coco_metrics: bool = True
-  # If set, the Waymo Open Dataset evaluator would be used.
-  use_wod_metrics: bool = False
-
-
-COCO_INPUT_PATH_BASE = 'coco'
-
-
-@exp_factory.register_config_factory('fasterrcnn_resnetfpn_coco')
-def fasterrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
-  """COCO object detection with Faster R-CNN."""
-  steps_per_epoch = 500
-  coco_val_samples = 5000
-  train_batch_size = 64
-  eval_batch_size = 8
-
-  config = cfg.ExperimentConfig(
-      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
-      task=MaskRCNNTask(
-          init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
-          init_checkpoint_modules='backbone',
-          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
-                                       'instances_val2017.json'),
-          model=MaskRCNN(
-              num_classes=91,
-              input_size=[1024, 1024, 3],
-              include_mask=False,
-              mask_head=None,
-              mask_sampler=None,
-              mask_roi_aligner=None),
-          losses=Losses(l2_weight_decay=0.00004),
-          train_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size,
-              parser=Parser(
-                  aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
-          validation_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
-              is_training=False,
-              global_batch_size=eval_batch_size,
-              drop_remainder=False)),
-      trainer=cfg.TrainerConfig(
-          train_steps=22500,
-          validation_steps=coco_val_samples // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'sgd',
-                  'sgd': {
-                      'momentum': 0.9
-                  }
-              },
-              'learning_rate': {
-                  'type': 'stepwise',
-                  'stepwise': {
-                      'boundaries': [15000, 20000],
-                      'values': [0.12, 0.012, 0.0012],
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 500,
-                      'warmup_learning_rate': 0.0067
-                  }
-              }
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None'
-      ])
-  return config
-
-
-@exp_factory.register_config_factory('maskrcnn_resnetfpn_coco')
-def maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
-  """COCO object detection with Mask R-CNN."""
-  steps_per_epoch = 500
-  coco_val_samples = 5000
-  train_batch_size = 64
-  eval_batch_size = 8
-
-  config = cfg.ExperimentConfig(
-      runtime=cfg.RuntimeConfig(
-          mixed_precision_dtype='bfloat16', enable_xla=True),
-      task=MaskRCNNTask(
-          init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
-          init_checkpoint_modules='backbone',
-          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
-                                       'instances_val2017.json'),
-          model=MaskRCNN(
-              num_classes=91, input_size=[1024, 1024, 3], include_mask=True),
-          losses=Losses(l2_weight_decay=0.00004),
-          train_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size,
-              parser=Parser(
-                  aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
-          validation_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
-              is_training=False,
-              global_batch_size=eval_batch_size,
-              drop_remainder=False)),
-      trainer=cfg.TrainerConfig(
-          train_steps=22500,
-          validation_steps=coco_val_samples // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'sgd',
-                  'sgd': {
-                      'momentum': 0.9
-                  }
-              },
-              'learning_rate': {
-                  'type': 'stepwise',
-                  'stepwise': {
-                      'boundaries': [15000, 20000],
-                      'values': [0.12, 0.012, 0.0012],
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 500,
-                      'warmup_learning_rate': 0.0067
-                  }
-              }
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None'
-      ])
-  return config
-
-
-@exp_factory.register_config_factory('maskrcnn_spinenet_coco')
-def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig:
-  """COCO object detection with Mask R-CNN with SpineNet backbone."""
-  steps_per_epoch = 463
-  coco_val_samples = 5000
-  train_batch_size = 256
-  eval_batch_size = 8
-
-  config = cfg.ExperimentConfig(
-      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
-      task=MaskRCNNTask(
-          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
-                                       'instances_val2017.json'),
-          model=MaskRCNN(
-              backbone=backbones.Backbone(
-                  type='spinenet',
-                  spinenet=backbones.SpineNet(
-                      model_id='49',
-                      min_level=3,
-                      max_level=7,
-                  )),
-              decoder=decoders.Decoder(
-                  type='identity', identity=decoders.Identity()),
-              anchor=Anchor(anchor_size=3),
-              norm_activation=common.NormActivation(use_sync_bn=True),
-              num_classes=91,
-              input_size=[640, 640, 3],
-              min_level=3,
-              max_level=7,
-              include_mask=True),
-          losses=Losses(l2_weight_decay=0.00004),
-          train_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size,
-              parser=Parser(
-                  aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)),
-          validation_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
-              is_training=False,
-              global_batch_size=eval_batch_size,
-              drop_remainder=False)),
-      trainer=cfg.TrainerConfig(
-          train_steps=steps_per_epoch * 350,
-          validation_steps=coco_val_samples // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'sgd',
-                  'sgd': {
-                      'momentum': 0.9
-                  }
-              },
-              'learning_rate': {
-                  'type': 'stepwise',
-                  'stepwise': {
-                      'boundaries': [
-                          steps_per_epoch * 320, steps_per_epoch * 340
-                      ],
-                      'values': [0.32, 0.032, 0.0032],
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 2000,
-                      'warmup_learning_rate': 0.0067
-                  }
-              }
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None',
-          'task.model.min_level == task.model.backbone.spinenet.min_level',
-          'task.model.max_level == task.model.backbone.spinenet.max_level',
-      ])
-  return config
-
-
-@exp_factory.register_config_factory('cascadercnn_spinenet_coco')
-def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig:
-  """COCO object detection with Cascade RCNN-RS with SpineNet backbone."""
-  steps_per_epoch = 463
-  coco_val_samples = 5000
-  train_batch_size = 256
-  eval_batch_size = 8
-
-  config = cfg.ExperimentConfig(
-      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
-      task=MaskRCNNTask(
-          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
-                                       'instances_val2017.json'),
-          model=MaskRCNN(
-              backbone=backbones.Backbone(
-                  type='spinenet',
-                  spinenet=backbones.SpineNet(
-                      model_id='49',
-                      min_level=3,
-                      max_level=7,
-                  )),
-              decoder=decoders.Decoder(
-                  type='identity', identity=decoders.Identity()),
-              roi_sampler=ROISampler(cascade_iou_thresholds=[0.6, 0.7]),
-              detection_head=DetectionHead(
-                  class_agnostic_bbox_pred=True, cascade_class_ensemble=True),
-              anchor=Anchor(anchor_size=3),
-              norm_activation=common.NormActivation(
-                  use_sync_bn=True, activation='swish'),
-              num_classes=91,
-              input_size=[640, 640, 3],
-              min_level=3,
-              max_level=7,
-              include_mask=True),
-          losses=Losses(l2_weight_decay=0.00004),
-          train_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size,
-              parser=Parser(
-                  aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.5)),
-          validation_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
-              is_training=False,
-              global_batch_size=eval_batch_size,
-              drop_remainder=False)),
-      trainer=cfg.TrainerConfig(
-          train_steps=steps_per_epoch * 500,
-          validation_steps=coco_val_samples // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'sgd',
-                  'sgd': {
-                      'momentum': 0.9
-                  }
-              },
-              'learning_rate': {
-                  'type': 'stepwise',
-                  'stepwise': {
-                      'boundaries': [
-                          steps_per_epoch * 475, steps_per_epoch * 490
-                      ],
-                      'values': [0.32, 0.032, 0.0032],
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 2000,
-                      'warmup_learning_rate': 0.0067
-                  }
-              }
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None',
-          'task.model.min_level == task.model.backbone.spinenet.min_level',
-          'task.model.max_level == task.model.backbone.spinenet.max_level',
-      ])
-  return config
--- a/official/vision/beta/configs/maskrcnn_test.py
+++ b/official/vision/beta/configs/maskrcnn_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for maskrcnn."""
-# pylint: disable=unused-import
-from absl.testing import parameterized
-import tensorflow as tf
-
-from official.core import config_definitions as cfg
-from official.core import exp_factory
-from official.vision import beta
-from official.vision.beta.configs import maskrcnn as exp_cfg
-
-
-class MaskRCNNConfigTest(tf.test.TestCase, parameterized.TestCase):
-
-  @parameterized.parameters(
-      ('fasterrcnn_resnetfpn_coco',),
-      ('maskrcnn_resnetfpn_coco',),
-      ('maskrcnn_spinenet_coco',),
-      ('cascadercnn_spinenet_coco',),
-  )
-  def test_maskrcnn_configs(self, config_name):
-    config = exp_factory.get_exp_config(config_name)
-    self.assertIsInstance(config, cfg.ExperimentConfig)
-    self.assertIsInstance(config.task, exp_cfg.MaskRCNNTask)
-    self.assertIsInstance(config.task.model, exp_cfg.MaskRCNN)
-    self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
-    config.validate()
-    config.task.train_data.is_training = None
-    with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
-      config.validate()
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/vision/beta/configs/retinanet.py
+++ b/official/vision/beta/configs/retinanet.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""RetinaNet configuration definition."""
-
-import dataclasses
-import os
-from typing import List, Optional, Union
-
-from official.core import config_definitions as cfg
-from official.core import exp_factory
-from official.modeling import hyperparams
-from official.modeling import optimization
-from official.vision.beta.configs import common
-from official.vision.beta.configs import decoders
-from official.vision.beta.configs import backbones
-
-
-# pylint: disable=missing-class-docstring
-# Keep for backward compatibility.
-@dataclasses.dataclass
-class TfExampleDecoder(common.TfExampleDecoder):
-  """A simple TF Example decoder config."""
-
-
-# Keep for backward compatibility.
-@dataclasses.dataclass
-class TfExampleDecoderLabelMap(common.TfExampleDecoderLabelMap):
-  """TF Example decoder with label map config."""
-
-
-# Keep for backward compatibility.
-@dataclasses.dataclass
-class DataDecoder(common.DataDecoder):
-  """Data decoder config."""
-
-
-@dataclasses.dataclass
-class Parser(hyperparams.Config):
-  num_channels: int = 3
-  match_threshold: float = 0.5
-  unmatched_threshold: float = 0.5
-  aug_rand_hflip: bool = False
-  aug_scale_min: float = 1.0
-  aug_scale_max: float = 1.0
-  skip_crowd_during_training: bool = True
-  max_num_instances: int = 100
-  # Can choose AutoAugment and RandAugment.
-  aug_type: Optional[common.Augmentation] = None
-
-  # Keep for backward compatibility. Not used.
-  aug_policy: Optional[str] = None
-
-
-@dataclasses.dataclass
-class DataConfig(cfg.DataConfig):
-  """Input config for training."""
-  input_path: str = ''
-  global_batch_size: int = 0
-  is_training: bool = False
-  dtype: str = 'bfloat16'
-  decoder: common.DataDecoder = common.DataDecoder()
-  parser: Parser = Parser()
-  shuffle_buffer_size: int = 10000
-  file_type: str = 'tfrecord'
-
-
-@dataclasses.dataclass
-class Anchor(hyperparams.Config):
-  num_scales: int = 3
-  aspect_ratios: List[float] = dataclasses.field(
-      default_factory=lambda: [0.5, 1.0, 2.0])
-  anchor_size: float = 4.0
-
-
-@dataclasses.dataclass
-class Losses(hyperparams.Config):
-  loss_weight: float = 1.0
-  focal_loss_alpha: float = 0.25
-  focal_loss_gamma: float = 1.5
-  huber_loss_delta: float = 0.1
-  box_loss_weight: int = 50
-  l2_weight_decay: float = 0.0
-
-
-@dataclasses.dataclass
-class AttributeHead(hyperparams.Config):
-  name: str = ''
-  type: str = 'regression'
-  size: int = 1
-
-
-@dataclasses.dataclass
-class RetinaNetHead(hyperparams.Config):
-  num_convs: int = 4
-  num_filters: int = 256
-  use_separable_conv: bool = False
-  attribute_heads: List[AttributeHead] = dataclasses.field(default_factory=list)
-
-
-@dataclasses.dataclass
-class DetectionGenerator(hyperparams.Config):
-  apply_nms: bool = True
-  pre_nms_top_k: int = 5000
-  pre_nms_score_threshold: float = 0.05
-  nms_iou_threshold: float = 0.5
-  max_num_detections: int = 100
-  nms_version: str = 'v2'  # `v2`, `v1`, `batched`.
-  use_cpu_nms: bool = False
-  soft_nms_sigma: Optional[float] = None  # Only works when nms_version='v1'.
-
-
-@dataclasses.dataclass
-class RetinaNet(hyperparams.Config):
-  num_classes: int = 0
-  input_size: List[int] = dataclasses.field(default_factory=list)
-  min_level: int = 3
-  max_level: int = 7
-  anchor: Anchor = Anchor()
-  backbone: backbones.Backbone = backbones.Backbone(
-      type='resnet', resnet=backbones.ResNet())
-  decoder: decoders.Decoder = decoders.Decoder(
-      type='fpn', fpn=decoders.FPN())
-  head: RetinaNetHead = RetinaNetHead()
-  detection_generator: DetectionGenerator = DetectionGenerator()
-  norm_activation: common.NormActivation = common.NormActivation()
-
-
-@dataclasses.dataclass
-class ExportConfig(hyperparams.Config):
-  output_normalized_coordinates: bool = False
-  cast_num_detections_to_float: bool = False
-  cast_detection_classes_to_float: bool = False
-
-
-@dataclasses.dataclass
-class RetinaNetTask(cfg.TaskConfig):
-  model: RetinaNet = RetinaNet()
-  train_data: DataConfig = DataConfig(is_training=True)
-  validation_data: DataConfig = DataConfig(is_training=False)
-  losses: Losses = Losses()
-  init_checkpoint: Optional[str] = None
-  init_checkpoint_modules: Union[
-      str, List[str]] = 'all'  # all, backbone, and/or decoder
-  annotation_file: Optional[str] = None
-  per_category_metrics: bool = False
-  export_config: ExportConfig = ExportConfig()
-
-
-@exp_factory.register_config_factory('retinanet')
-def retinanet() -> cfg.ExperimentConfig:
-  """RetinaNet general config."""
-  return cfg.ExperimentConfig(
-      task=RetinaNetTask(),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None'
-      ])
-
-
-COCO_INPUT_PATH_BASE = 'coco'
-COCO_TRAIN_EXAMPLES = 118287
-COCO_VAL_EXAMPLES = 5000
-
-
-@exp_factory.register_config_factory('retinanet_resnetfpn_coco')
-def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig:
-  """COCO object detection with RetinaNet."""
-  train_batch_size = 256
-  eval_batch_size = 8
-  steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
-
-  config = cfg.ExperimentConfig(
-      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
-      task=RetinaNetTask(
-          init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
-          init_checkpoint_modules='backbone',
-          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
-                                       'instances_val2017.json'),
-          model=RetinaNet(
-              num_classes=91,
-              input_size=[640, 640, 3],
-              norm_activation=common.NormActivation(use_sync_bn=False),
-              min_level=3,
-              max_level=7),
-          losses=Losses(l2_weight_decay=1e-4),
-          train_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size,
-              parser=Parser(
-                  aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.2)),
-          validation_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
-              is_training=False,
-              global_batch_size=eval_batch_size)),
-      trainer=cfg.TrainerConfig(
-          train_steps=72 * steps_per_epoch,
-          validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'sgd',
-                  'sgd': {
-                      'momentum': 0.9
-                  }
-              },
-              'learning_rate': {
-                  'type': 'stepwise',
-                  'stepwise': {
-                      'boundaries': [
-                          57 * steps_per_epoch, 67 * steps_per_epoch
-                      ],
-                      'values': [
-                          0.32 * train_batch_size / 256.0,
-                          0.032 * train_batch_size / 256.0,
-                          0.0032 * train_batch_size / 256.0
-                      ],
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 500,
-                      'warmup_learning_rate': 0.0067
-                  }
-              }
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None'
-      ])
-
-  return config
-
-
-@exp_factory.register_config_factory('retinanet_spinenet_coco')
-def retinanet_spinenet_coco() -> cfg.ExperimentConfig:
-  """COCO object detection with RetinaNet using SpineNet backbone."""
-  train_batch_size = 256
-  eval_batch_size = 8
-  steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
-  input_size = 640
-
-  config = cfg.ExperimentConfig(
-      runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'),
-      task=RetinaNetTask(
-          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
-                                       'instances_val2017.json'),
-          model=RetinaNet(
-              backbone=backbones.Backbone(
-                  type='spinenet',
-                  spinenet=backbones.SpineNet(
-                      model_id='49',
-                      stochastic_depth_drop_rate=0.2,
-                      min_level=3,
-                      max_level=7)),
-              decoder=decoders.Decoder(
-                  type='identity', identity=decoders.Identity()),
-              anchor=Anchor(anchor_size=3),
-              norm_activation=common.NormActivation(
-                  use_sync_bn=True, activation='swish'),
-              num_classes=91,
-              input_size=[input_size, input_size, 3],
-              min_level=3,
-              max_level=7),
-          losses=Losses(l2_weight_decay=4e-5),
-          train_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size,
-              parser=Parser(
-                  aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)),
-          validation_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
-              is_training=False,
-              global_batch_size=eval_batch_size)),
-      trainer=cfg.TrainerConfig(
-          train_steps=500 * steps_per_epoch,
-          validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'sgd',
-                  'sgd': {
-                      'momentum': 0.9
-                  }
-              },
-              'learning_rate': {
-                  'type': 'stepwise',
-                  'stepwise': {
-                      'boundaries': [
-                          475 * steps_per_epoch, 490 * steps_per_epoch
-                      ],
-                      'values': [
-                          0.32 * train_batch_size / 256.0,
-                          0.032 * train_batch_size / 256.0,
-                          0.0032 * train_batch_size / 256.0
-                      ],
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 2000,
-                      'warmup_learning_rate': 0.0067
-                  }
-              }
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None',
-          'task.model.min_level == task.model.backbone.spinenet.min_level',
-          'task.model.max_level == task.model.backbone.spinenet.max_level',
-      ])
-
-  return config
-
-
-@exp_factory.register_config_factory('retinanet_mobile_coco')
-def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig:
-  """COCO object detection with mobile RetinaNet."""
-  train_batch_size = 256
-  eval_batch_size = 8
-  steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
-  input_size = 384
-
-  config = cfg.ExperimentConfig(
-      runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'),
-      task=RetinaNetTask(
-          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
-                                       'instances_val2017.json'),
-          model=RetinaNet(
-              backbone=backbones.Backbone(
-                  type='spinenet_mobile',
-                  spinenet_mobile=backbones.SpineNetMobile(
-                      model_id='49',
-                      stochastic_depth_drop_rate=0.2,
-                      min_level=3,
-                      max_level=7,
-                      use_keras_upsampling_2d=False)),
-              decoder=decoders.Decoder(
-                  type='identity', identity=decoders.Identity()),
-              head=RetinaNetHead(num_filters=48, use_separable_conv=True),
-              anchor=Anchor(anchor_size=3),
-              norm_activation=common.NormActivation(
-                  use_sync_bn=True, activation='swish'),
-              num_classes=91,
-              input_size=[input_size, input_size, 3],
-              min_level=3,
-              max_level=7),
-          losses=Losses(l2_weight_decay=3e-5),
-          train_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
-              is_training=True,
-              global_batch_size=train_batch_size,
-              parser=Parser(
-                  aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)),
-          validation_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
-              is_training=False,
-              global_batch_size=eval_batch_size)),
-      trainer=cfg.TrainerConfig(
-          train_steps=600 * steps_per_epoch,
-          validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
-          validation_interval=steps_per_epoch,
-          steps_per_loop=steps_per_epoch,
-          summary_interval=steps_per_epoch,
-          checkpoint_interval=steps_per_epoch,
-          optimizer_config=optimization.OptimizationConfig({
-              'optimizer': {
-                  'type': 'sgd',
-                  'sgd': {
-                      'momentum': 0.9
-                  }
-              },
-              'learning_rate': {
-                  'type': 'stepwise',
-                  'stepwise': {
-                      'boundaries': [
-                          575 * steps_per_epoch, 590 * steps_per_epoch
-                      ],
-                      'values': [
-                          0.32 * train_batch_size / 256.0,
-                          0.032 * train_batch_size / 256.0,
-                          0.0032 * train_batch_size / 256.0
-                      ],
-                  }
-              },
-              'warmup': {
-                  'type': 'linear',
-                  'linear': {
-                      'warmup_steps': 2000,
-                      'warmup_learning_rate': 0.0067
-                  }
-              }
-          })),
-      restrictions=[
-          'task.train_data.is_training != None',
-          'task.validation_data.is_training != None',
-      ])
-
-  return config
--- a/official/vision/beta/configs/retinanet_test.py
+++ b/official/vision/beta/configs/retinanet_test.py
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for retinanet."""
-# pylint: disable=unused-import
-from absl.testing import parameterized
-import tensorflow as tf
-
-from official.core import config_definitions as cfg
-from official.core import exp_factory
-from official.vision import beta
-from official.vision.beta.configs import retinanet as exp_cfg
-
-
-class RetinaNetConfigTest(tf.test.TestCase, parameterized.TestCase):
-
-  @parameterized.parameters(
-      ('retinanet_resnetfpn_coco',),
-      ('retinanet_spinenet_coco',),
-      ('retinanet_mobile_coco',),
-  )
-  def test_retinanet_configs(self, config_name):
-    config = exp_factory.get_exp_config(config_name)
-    self.assertIsInstance(config, cfg.ExperimentConfig)
-    self.assertIsInstance(config.task, exp_cfg.RetinaNetTask)
-    self.assertIsInstance(config.task.model, exp_cfg.RetinaNet)
-    self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
-    config.validate()
-    config.task.train_data.is_training = None
-    with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
-      config.validate()
-
-
-if __name__ == '__main__':
-  tf.test.main()