Commit e4be7e00 authored by Yeqing Li's avatar Yeqing Li Committed by A. Unique TensorFlower
Browse files

Removes unneeded content of the beta folder.

PiperOrigin-RevId: 437276665
parent f47405b5
# SpineNet-49 COCO detection with protocal C config. Expecting 44.2% AP.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
losses:
l2_weight_decay: 4.0e-05
model:
anchor:
anchor_size: 3
aspect_ratios: [0.5, 1.0, 2.0]
num_scales: 3
backbone:
spinenet:
stochastic_depth_drop_rate: 0.2
model_id: '49'
type: 'spinenet'
decoder:
type: 'identity'
head:
num_convs: 4
num_filters: 256
input_size: [640, 640, 3]
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
dtype: 'bfloat16'
global_batch_size: 256
is_training: true
parser:
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.1
validation_data:
dtype: 'bfloat16'
global_batch_size: 8
is_training: false
trainer:
checkpoint_interval: 462
optimizer_config:
learning_rate:
stepwise:
boundaries: [219450, 226380]
values: [0.32, 0.032, 0.0032]
type: 'stepwise'
warmup:
linear:
warmup_learning_rate: 0.0067
warmup_steps: 2000
steps_per_loop: 462
train_steps: 231000
validation_interval: 462
validation_steps: 625
# --experiment_type=retinanet_mobile_coco
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
losses:
l2_weight_decay: 3.0e-05
model:
anchor:
anchor_size: 3
aspect_ratios: [0.5, 1.0, 2.0]
num_scales: 3
backbone:
spinenet_mobile:
stochastic_depth_drop_rate: 0.2
model_id: '49S'
se_ratio: 0.2
type: 'spinenet_mobile'
decoder:
type: 'identity'
head:
num_convs: 4
num_filters: 40
use_separable_conv: true
input_size: [384, 384, 3]
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
dtype: 'bfloat16'
global_batch_size: 256
is_training: true
parser:
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.5
validation_data:
dtype: 'bfloat16'
global_batch_size: 8
is_training: false
trainer:
checkpoint_interval: 462
optimizer_config:
learning_rate:
stepwise:
boundaries: [263340, 272580]
values: [0.32, 0.032, 0.0032]
type: 'stepwise'
warmup:
linear:
warmup_learning_rate: 0.0067
warmup_steps: 2000
steps_per_loop: 462
train_steps: 277200
validation_interval: 462
validation_steps: 625
# --experiment_type=retinanet_mobile_coco
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
losses:
l2_weight_decay: 3.0e-05
model:
anchor:
anchor_size: 3
aspect_ratios: [0.5, 1.0, 2.0]
num_scales: 3
backbone:
spinenet_mobile:
stochastic_depth_drop_rate: 0.2
model_id: '49XS'
se_ratio: 0.2
type: 'spinenet_mobile'
decoder:
type: 'identity'
head:
num_convs: 4
num_filters: 24
use_separable_conv: true
input_size: [256, 256, 3]
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
dtype: 'bfloat16'
global_batch_size: 256
is_training: true
parser:
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.5
validation_data:
dtype: 'bfloat16'
global_batch_size: 8
is_training: false
trainer:
checkpoint_interval: 462
optimizer_config:
learning_rate:
stepwise:
boundaries: [263340, 272580]
values: [0.32, 0.032, 0.0032]
type: 'stepwise'
warmup:
linear:
warmup_learning_rate: 0.0067
warmup_steps: 2000
steps_per_loop: 462
train_steps: 277200
validation_interval: 462
validation_steps: 625
# SpineNet-96 COCO detection with protocol C config. Expecting 48.5% AP.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
losses:
l2_weight_decay: 4.0e-05
model:
anchor:
anchor_size: 3
aspect_ratios: [0.5, 1.0, 2.0]
num_scales: 3
backbone:
spinenet:
stochastic_depth_drop_rate: 0.2
model_id: '96'
type: 'spinenet'
decoder:
type: 'identity'
head:
num_convs: 4
num_filters: 256
input_size: [1024, 1024, 3]
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
dtype: 'bfloat16'
global_batch_size: 256
is_training: true
parser:
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.1
validation_data:
dtype: 'bfloat16'
global_batch_size: 8
is_training: false
trainer:
checkpoint_interval: 462
optimizer_config:
learning_rate:
stepwise:
boundaries: [219450, 226380]
values: [0.32, 0.032, 0.0032]
type: 'stepwise'
warmup:
linear:
warmup_learning_rate: 0.0067
warmup_steps: 2000
steps_per_loop: 462
train_steps: 231000
validation_interval: 462
validation_steps: 625
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
annotation_file: '' # Can't use annotation file when tfds is used.
losses:
l2_weight_decay: 0.0001
model:
num_classes: 91
max_level: 7
min_level: 3
input_size: [640, 640, 3]
norm_activation:
activation: relu
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
tfds_name: 'coco/2017'
tfds_split: 'train'
drop_remainder: true
dtype: bfloat16
global_batch_size: 256
input_path: ''
is_training: true
shuffle_buffer_size: 1000
validation_data:
tfds_name: 'coco/2017'
tfds_split: 'validation'
drop_remainder: true
dtype: bfloat16
global_batch_size: 8
input_path: ''
is_training: false
# Benchmarks runs on same instnace, change eval batch size to fit on 4x4 tpu
task:
validation_data:
global_batch_size: 32
trainer:
validation_interval: 1560
validation_steps: 156
# Use your own cityscapes preprocessed dataset. 79% meanIoU.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
task:
model:
num_classes: 19
input_size: [null, null, 3]
backbone:
type: 'dilated_resnet'
dilated_resnet:
model_id: 101
output_stride: 16
stem_type: 'v1'
se_ratio: 0.25
stochastic_depth_drop_rate: 0.2
multigrid: [1, 2, 4]
last_stage_repeats: 1
decoder:
aspp:
pool_kernel_size: [512, 1024]
head:
feature_fusion: 'deeplabv3plus'
low_level: 2
low_level_num_filters: 48
norm_activation:
activation: 'swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
losses:
top_k_percent_pixels: 1.0 # only backpropagate loss for the topk 100% pixels.
train_data:
output_size: [1024, 2048]
crop_size: [512, 1024]
input_path: ''
tfds_name: 'cityscapes/semantic_segmentation'
tfds_split: 'train'
is_training: true
global_batch_size: 16
dtype: 'float32'
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.5
validation_data:
output_size: [1024, 2048]
input_path: ''
tfds_name: 'cityscapes/semantic_segmentation'
tfds_split: 'validation'
is_training: false
global_batch_size: 16
dtype: 'float32'
drop_remainder: false
resize_eval_groundtruth: true
trainer:
optimizer_config:
learning_rate:
polynomial:
decay_steps: 90000
initial_learning_rate: 0.01
power: 0.9
type: polynomial
optimizer:
sgd:
momentum: 0.9
type: sgd
warmup:
linear:
name: linear
warmup_learning_rate: 0
warmup_steps: 925
type: linear
steps_per_loop: 185
summary_interval: 185
train_steps: 90000
validation_interval: 185
validation_steps: 31
checkpoint_interval: 185
# 3D ResNet-50 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 77.0% top-1, 93.0% top-5.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
dropout_rate: 0.5
norm_activation:
use_sync_bn: false
backbone:
resnet_3d:
block_specs: !!python/tuple
- temporal_kernel_sizes: !!python/tuple
- 1
- 1
- 1
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 1
- 1
- 1
- 1
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: false
model_id: 50
stem_conv_temporal_kernel_size: 5
stem_conv_temporal_stride: 2
stem_pool_temporal_stride: 1
train_data:
name: kinetics400
feature_shape: !!python/tuple
- 32
- 224
- 224
- 3
temporal_stride: 2
global_batch_size: 1024
dtype: 'bfloat16'
shuffle_buffer_size: 1024
aug_max_area_ratio: 1.0
aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5
validation_data:
name: kinetics400
feature_shape: !!python/tuple
- 32
- 256
- 256
- 3
temporal_stride: 2
num_test_clips: 10
num_test_crops: 3
global_batch_size: 64
dtype: 'bfloat16'
drop_remainder: false
trainer:
optimizer_config:
learning_rate:
cosine:
initial_learning_rate: 0.8
decay_steps: 42104
warmup:
linear:
warmup_steps: 1053
train_steps: 42104
steps_per_loop: 500
summary_interval: 500
validation_interval: 500
# 3D ResNet-RS-50 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 78.2% top-1 accuracy.
runtime:
mixed_precision_dtype: bfloat16
task:
losses:
l2_weight_decay: 0.00004
label_smoothing: 0.1
one_hot: true
model:
aggregate_endpoints: false
backbone:
resnet_3d_rs:
model_id: 50
stem_type: 'v1'
stem_conv_temporal_kernel_size: 5
stem_conv_temporal_stride: 2
stem_pool_temporal_stride: 1
stochastic_depth_drop_rate: 0.1
se_ratio: 0.25
type: resnet_3d_rs
dropout_rate: 0.5
model_type: video_classification
norm_activation:
activation: relu
norm_epsilon: 1.0e-05
norm_momentum: 0.0
use_sync_bn: false
train_data:
data_format: channels_last
drop_remainder: true
dtype: bfloat16
feature_shape: !!python/tuple
- 32
- 224
- 224
- 3
file_type: sstable
global_batch_size: 1024
is_training: true
min_image_size: 256
name: kinetics400
num_channels: 3
num_classes: 400
num_examples: 215570
num_test_clips: 1
num_test_crops: 1
one_hot: true
temporal_stride: 2
aug_max_area_ratio: 1.0
aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5
validation_data:
data_format: channels_last
drop_remainder: false
dtype: bfloat16
feature_shape: !!python/tuple
- 32
- 256
- 256
- 3
file_type: sstable
global_batch_size: 64
is_training: false
min_image_size: 256
name: kinetics400
num_channels: 3
num_classes: 400
num_examples: 17706
num_test_clips: 10
num_test_crops: 3
one_hot: true
temporal_stride: 2
trainer:
checkpoint_interval: 210
max_to_keep: 3
optimizer_config:
ema:
average_decay: 0.9999
trainable_weights_only: false
learning_rate:
cosine:
decay_steps: 73682
initial_learning_rate: 0.8
name: CosineDecay
type: cosine
warmup:
linear:
name: linear
warmup_learning_rate: 0
warmup_steps: 1050
type: linear
train_steps: 73682
steps_per_loop: 500
summary_interval: 500
validation_interval: 500
# SlowOnly 16x4 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 75.6% top-1, 92.1% top-5.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
dropout_rate: 0.5
norm_activation:
use_sync_bn: false
backbone:
resnet_3d:
block_specs: !!python/tuple
- temporal_kernel_sizes: !!python/tuple
- 1
- 1
- 1
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 1
- 1
- 1
- 1
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: false
model_id: 50
stem_conv_temporal_kernel_size: 1
stem_conv_temporal_stride: 1
stem_pool_temporal_stride: 1
train_data:
name: kinetics400
feature_shape: !!python/tuple
- 16
- 224
- 224
- 3
temporal_stride: 4
global_batch_size: 1024
dtype: 'bfloat16'
shuffle_buffer_size: 1024
aug_max_area_ratio: 1.0
aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5
validation_data:
name: kinetics400
feature_shape: !!python/tuple
- 16
- 256
- 256
- 3
temporal_stride: 4
num_test_clips: 10
num_test_crops: 3
global_batch_size: 64
dtype: 'bfloat16'
drop_remainder: false
trainer:
optimizer_config:
learning_rate:
cosine:
initial_learning_rate: 0.8
decay_steps: 42104
warmup:
linear:
warmup_steps: 1053
train_steps: 42104
steps_per_loop: 500
summary_interval: 500
validation_interval: 500
# SlowOnly 8x8 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 74.1% top-1, 91.4% top-5.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
dropout_rate: 0.5
norm_activation:
use_sync_bn: false
backbone:
resnet_3d:
block_specs: !!python/tuple
- temporal_kernel_sizes: !!python/tuple
- 1
- 1
- 1
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 1
- 1
- 1
- 1
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: false
model_id: 50
stem_conv_temporal_kernel_size: 1
stem_conv_temporal_stride: 1
stem_pool_temporal_stride: 1
train_data:
name: kinetics400
feature_shape: !!python/tuple
- 8
- 224
- 224
- 3
temporal_stride: 8
global_batch_size: 1024
dtype: 'bfloat16'
shuffle_buffer_size: 1024
aug_max_area_ratio: 1.0
aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5
validation_data:
name: kinetics400
feature_shape: !!python/tuple
- 8
- 256
- 256
- 3
temporal_stride: 8
num_test_clips: 10
num_test_crops: 3
global_batch_size: 64
dtype: 'bfloat16'
drop_remainder: false
trainer:
optimizer_config:
learning_rate:
cosine:
initial_learning_rate: 0.8
decay_steps: 42104
warmup:
linear:
warmup_steps: 1053
train_steps: 42104
steps_per_loop: 500
summary_interval: 500
validation_interval: 500
# 3D ResNet-50 video classification on Kinetics-600.
#
# --experiment_type=video_classification_kinetics600
# Expected accuracy: 79.5% top-1, 94.8% top-5.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
dropout_rate: 0.5
norm_activation:
use_sync_bn: false
backbone:
resnet_3d:
block_specs: !!python/tuple
- temporal_kernel_sizes: !!python/tuple
- 1
- 1
- 1
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 1
- 1
- 1
- 1
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: false
model_id: 50
stem_conv_temporal_kernel_size: 5
stem_conv_temporal_stride: 2
stem_pool_temporal_stride: 1
train_data:
name: kinetics600
feature_shape: !!python/tuple
- 32
- 224
- 224
- 3
temporal_stride: 2
global_batch_size: 1024
dtype: 'bfloat16'
shuffle_buffer_size: 1024
aug_max_area_ratio: 1.0
aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5
validation_data:
name: kinetics600
feature_shape: !!python/tuple
- 32
- 256
- 256
- 3
temporal_stride: 2
num_test_clips: 10
num_test_crops: 3
global_batch_size: 64
dtype: 'bfloat16'
drop_remainder: false
trainer:
optimizer_config:
learning_rate:
cosine:
initial_learning_rate: 0.8
decay_steps: 71488
warmup:
linear:
warmup_steps: 1787
train_steps: 71488
steps_per_loop: 500
summary_interval: 500
validation_interval: 500
# 3D ResNet-50g video classification on Kinetics-600.
#
# --experiment_type=video_classification_kinetics600
# Expected accuracy: 78.7% accuracy, 93.6% top-5.
# Train on TPU: v3-128, eval on TPU: v3-32
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
init_checkpoint: null
init_checkpoint_modules: all
losses:
l2_weight_decay: 0.0001
label_smoothing: 0.0
model:
aggregate_endpoints: false
backbone:
resnet_3d:
block_specs: !!python/tuple
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: true
- temporal_kernel_sizes: !!python/tuple
- 3
- 1
- 3
- 1
temporal_strides: 1
use_self_gating: true
- temporal_kernel_sizes: !!python/tuple
- 3
- 1
- 3
- 1
- 3
- 1
temporal_strides: 1
use_self_gating: true
- temporal_kernel_sizes: !!python/tuple
- 1
- 3
- 1
temporal_strides: 1
use_self_gating: true
model_id: 50
stem_conv_temporal_kernel_size: 5
stem_conv_temporal_stride: 2
stem_pool_temporal_stride: 2
stem_type: v0
stochastic_depth_drop_rate: 0.0
type: resnet_3d
dropout_rate: 0.2
model_type: video_classification
norm_activation:
activation: relu
norm_epsilon: 1.0e-05
norm_momentum: 0.9
use_sync_bn: false
train_data:
aug_max_area_ratio: 1.0
aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.49
aug_min_aspect_ratio: 0.5
drop_remainder: true
dtype: 'bfloat16'
feature_shape: !!python/tuple
- 64
- 224
- 224
- 3
global_batch_size: 1024
min_image_size: 256
name: kinetics600
num_classes: 600
split: train
validation_data:
dtype: 'bfloat16'
feature_shape: !!python/tuple
- 250
- 224
- 224
- 3
global_batch_size: 64
min_image_size: 256
name: kinetics600
num_classes: 600
num_examples: 27780
num_test_clips: 1
num_test_crops: 1
one_hot: true
trainer:
optimizer_config:
learning_rate:
cosine:
alpha: 0.0
decay_steps: 71400
initial_learning_rate: 1.6
name: CosineDecay
type: cosine
warmup:
linear:
name: linear
warmup_learning_rate: 0
warmup_steps: 1785
type: linear
train_steps: 71400
steps_per_loop: 500
summary_interval: 500
validation_interval: 500
# SlowOnly 8x8 video classification on Kinetics-600.
#
# --experiment_type=video_classification_kinetics600
# Expected accuracy: 77.3% top-1, 93.6% top-5.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
dropout_rate: 0.5
norm_activation:
use_sync_bn: false
backbone:
resnet_3d:
block_specs: !!python/tuple
- temporal_kernel_sizes: !!python/tuple
- 1
- 1
- 1
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 1
- 1
- 1
- 1
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: false
- temporal_kernel_sizes: !!python/tuple
- 3
- 3
- 3
temporal_strides: 1
use_self_gating: false
model_id: 50
stem_conv_temporal_kernel_size: 1
stem_conv_temporal_stride: 1
stem_pool_temporal_stride: 1
train_data:
name: kinetics600
feature_shape: !!python/tuple
- 8
- 224
- 224
- 3
temporal_stride: 8
global_batch_size: 1024
dtype: 'bfloat16'
shuffle_buffer_size: 1024
aug_max_area_ratio: 1.0
aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5
validation_data:
name: kinetics600
feature_shape: !!python/tuple
- 8
- 256
- 256
- 3
temporal_stride: 8
num_test_clips: 10
num_test_crops: 3
global_batch_size: 64
dtype: 'bfloat16'
drop_remainder: false
trainer:
optimizer_config:
learning_rate:
cosine:
initial_learning_rate: 0.8
decay_steps: 71488
warmup:
linear:
warmup_steps: 1787
train_steps: 71488
steps_per_loop: 500
summary_interval: 500
validation_interval: 500
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Image classification configuration definition."""
import dataclasses
import os
from typing import List, Optional
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
from official.vision.beta.configs import common
from official.vision.beta.configs import backbones
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
"""Input config for training."""
input_path: str = ''
global_batch_size: int = 0
is_training: bool = True
dtype: str = 'float32'
shuffle_buffer_size: int = 10000
cycle_length: int = 10
is_multilabel: bool = False
aug_rand_hflip: bool = True
aug_type: Optional[
common.Augmentation] = None # Choose from AutoAugment and RandAugment.
color_jitter: float = 0.
random_erasing: Optional[common.RandomErasing] = None
file_type: str = 'tfrecord'
image_field_key: str = 'image/encoded'
label_field_key: str = 'image/class/label'
decode_jpeg_only: bool = True
mixup_and_cutmix: Optional[common.MixupAndCutmix] = None
decoder: Optional[common.DataDecoder] = common.DataDecoder()
# Keep for backward compatibility.
aug_policy: Optional[str] = None # None, 'autoaug', or 'randaug'.
randaug_magnitude: Optional[int] = 10
@dataclasses.dataclass
class ImageClassificationModel(hyperparams.Config):
"""The model config."""
num_classes: int = 0
input_size: List[int] = dataclasses.field(default_factory=list)
backbone: backbones.Backbone = backbones.Backbone(
type='resnet', resnet=backbones.ResNet())
dropout_rate: float = 0.0
norm_activation: common.NormActivation = common.NormActivation(
use_sync_bn=False)
# Adds a BatchNormalization layer pre-GlobalAveragePooling in classification
add_head_batch_norm: bool = False
kernel_initializer: str = 'random_uniform'
@dataclasses.dataclass
class Losses(hyperparams.Config):
loss_weight: float = 1.0
one_hot: bool = True
label_smoothing: float = 0.0
l2_weight_decay: float = 0.0
soft_labels: bool = False
@dataclasses.dataclass
class Evaluation(hyperparams.Config):
top_k: int = 5
@dataclasses.dataclass
class ImageClassificationTask(cfg.TaskConfig):
"""The task config."""
model: ImageClassificationModel = ImageClassificationModel()
train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(is_training=False)
losses: Losses = Losses()
evaluation: Evaluation = Evaluation()
init_checkpoint: Optional[str] = None
init_checkpoint_modules: str = 'all' # all or backbone
model_output_keys: Optional[List[int]] = dataclasses.field(
default_factory=list)
@exp_factory.register_config_factory('image_classification')
def image_classification() -> cfg.ExperimentConfig:
"""Image classification general."""
return cfg.ExperimentConfig(
task=ImageClassificationTask(),
trainer=cfg.TrainerConfig(),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
IMAGENET_TRAIN_EXAMPLES = 1281167
IMAGENET_VAL_EXAMPLES = 50000
IMAGENET_INPUT_PATH_BASE = 'imagenet-2012-tfrecord'
@exp_factory.register_config_factory('resnet_imagenet')
def image_classification_imagenet() -> cfg.ExperimentConfig:
"""Image classification on imagenet with resnet."""
train_batch_size = 4096
eval_batch_size = 4096
steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(enable_xla=True),
task=ImageClassificationTask(
model=ImageClassificationModel(
num_classes=1001,
input_size=[224, 224, 3],
backbone=backbones.Backbone(
type='resnet', resnet=backbones.ResNet(model_id=50)),
norm_activation=common.NormActivation(
norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)),
losses=Losses(l2_weight_decay=1e-4),
train_data=DataConfig(
input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size),
validation_data=DataConfig(
input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
is_training=False,
global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig(
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
train_steps=90 * steps_per_epoch,
validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [
30 * steps_per_epoch, 60 * steps_per_epoch,
80 * steps_per_epoch
],
'values': [
0.1 * train_batch_size / 256,
0.01 * train_batch_size / 256,
0.001 * train_batch_size / 256,
0.0001 * train_batch_size / 256,
]
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 5 * steps_per_epoch,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
@exp_factory.register_config_factory('resnet_rs_imagenet')
def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig:
"""Image classification on imagenet with resnet-rs."""
train_batch_size = 4096
eval_batch_size = 4096
steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
config = cfg.ExperimentConfig(
task=ImageClassificationTask(
model=ImageClassificationModel(
num_classes=1001,
input_size=[160, 160, 3],
backbone=backbones.Backbone(
type='resnet',
resnet=backbones.ResNet(
model_id=50,
stem_type='v1',
resnetd_shortcut=True,
replace_stem_max_pool=True,
se_ratio=0.25,
stochastic_depth_drop_rate=0.0)),
dropout_rate=0.25,
norm_activation=common.NormActivation(
norm_momentum=0.0,
norm_epsilon=1e-5,
use_sync_bn=False,
activation='swish')),
losses=Losses(l2_weight_decay=4e-5, label_smoothing=0.1),
train_data=DataConfig(
input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
aug_type=common.Augmentation(
type='randaug', randaug=common.RandAugment(magnitude=10))),
validation_data=DataConfig(
input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
is_training=False,
global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig(
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
train_steps=350 * steps_per_epoch,
validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'ema': {
'average_decay': 0.9999,
'trainable_weights_only': False,
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 1.6,
'decay_steps': 350 * steps_per_epoch
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 5 * steps_per_epoch,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
@exp_factory.register_config_factory('revnet_imagenet')
def image_classification_imagenet_revnet() -> cfg.ExperimentConfig:
"""Returns a revnet config for image classification on imagenet."""
train_batch_size = 4096
eval_batch_size = 4096
steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
config = cfg.ExperimentConfig(
task=ImageClassificationTask(
model=ImageClassificationModel(
num_classes=1001,
input_size=[224, 224, 3],
backbone=backbones.Backbone(
type='revnet', revnet=backbones.RevNet(model_id=56)),
norm_activation=common.NormActivation(
norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False),
add_head_batch_norm=True),
losses=Losses(l2_weight_decay=1e-4),
train_data=DataConfig(
input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size),
validation_data=DataConfig(
input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
is_training=False,
global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig(
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
train_steps=90 * steps_per_epoch,
validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [
30 * steps_per_epoch, 60 * steps_per_epoch,
80 * steps_per_epoch
],
'values': [0.8, 0.08, 0.008, 0.0008]
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 5 * steps_per_epoch,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
@exp_factory.register_config_factory('mobilenet_imagenet')
def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
"""Image classification on imagenet with mobilenet."""
train_batch_size = 4096
eval_batch_size = 4096
steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
config = cfg.ExperimentConfig(
task=ImageClassificationTask(
model=ImageClassificationModel(
num_classes=1001,
dropout_rate=0.2,
input_size=[224, 224, 3],
backbone=backbones.Backbone(
type='mobilenet',
mobilenet=backbones.MobileNet(
model_id='MobileNetV2', filter_size_scale=1.0)),
norm_activation=common.NormActivation(
norm_momentum=0.997, norm_epsilon=1e-3, use_sync_bn=False)),
losses=Losses(l2_weight_decay=1e-5, label_smoothing=0.1),
train_data=DataConfig(
input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size),
validation_data=DataConfig(
input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
is_training=False,
global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig(
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
train_steps=500 * steps_per_epoch,
validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'rmsprop',
'rmsprop': {
'rho': 0.9,
'momentum': 0.9,
'epsilon': 0.002,
}
},
'learning_rate': {
'type': 'exponential',
'exponential': {
'initial_learning_rate':
0.008 * (train_batch_size // 128),
'decay_steps':
int(2.5 * steps_per_epoch),
'decay_rate':
0.98,
'staircase':
True
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 5 * steps_per_epoch,
'warmup_learning_rate': 0
}
},
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for image_classification."""
# pylint: disable=unused-import
from absl.testing import parameterized
import tensorflow as tf
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.vision import beta
from official.vision.beta.configs import image_classification as exp_cfg
class ImageClassificationConfigTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
('resnet_imagenet',),
('resnet_rs_imagenet',),
('revnet_imagenet',),
('mobilenet_imagenet'),
)
def test_image_classification_configs(self, config_name):
config = exp_factory.get_exp_config(config_name)
self.assertIsInstance(config, cfg.ExperimentConfig)
self.assertIsInstance(config.task, exp_cfg.ImageClassificationTask)
self.assertIsInstance(config.task.model,
exp_cfg.ImageClassificationModel)
self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
config.validate()
config.task.train_data.is_training = None
with self.assertRaises(KeyError):
config.validate()
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""R-CNN(-RS) configuration definition."""
import dataclasses
import os
from typing import List, Optional, Union
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
from official.vision.beta.configs import common
from official.vision.beta.configs import decoders
from official.vision.beta.configs import backbones
# pylint: disable=missing-class-docstring
@dataclasses.dataclass
class Parser(hyperparams.Config):
num_channels: int = 3
match_threshold: float = 0.5
unmatched_threshold: float = 0.5
aug_rand_hflip: bool = False
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
skip_crowd_during_training: bool = True
max_num_instances: int = 100
rpn_match_threshold: float = 0.7
rpn_unmatched_threshold: float = 0.3
rpn_batch_size_per_im: int = 256
rpn_fg_fraction: float = 0.5
mask_crop_size: int = 112
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
"""Input config for training."""
input_path: str = ''
global_batch_size: int = 0
is_training: bool = False
dtype: str = 'bfloat16'
decoder: common.DataDecoder = common.DataDecoder()
parser: Parser = Parser()
shuffle_buffer_size: int = 10000
file_type: str = 'tfrecord'
drop_remainder: bool = True
# Number of examples in the data set, it's used to create the annotation file.
num_examples: int = -1
@dataclasses.dataclass
class Anchor(hyperparams.Config):
num_scales: int = 1
aspect_ratios: List[float] = dataclasses.field(
default_factory=lambda: [0.5, 1.0, 2.0])
anchor_size: float = 8.0
@dataclasses.dataclass
class RPNHead(hyperparams.Config):
num_convs: int = 1
num_filters: int = 256
use_separable_conv: bool = False
@dataclasses.dataclass
class DetectionHead(hyperparams.Config):
num_convs: int = 4
num_filters: int = 256
use_separable_conv: bool = False
num_fcs: int = 1
fc_dims: int = 1024
class_agnostic_bbox_pred: bool = False # Has to be True for Cascade RCNN.
# If additional IoUs are passed in 'cascade_iou_thresholds'
# then ensemble the class probabilities from all heads.
cascade_class_ensemble: bool = False
@dataclasses.dataclass
class ROIGenerator(hyperparams.Config):
pre_nms_top_k: int = 2000
pre_nms_score_threshold: float = 0.0
pre_nms_min_size_threshold: float = 0.0
nms_iou_threshold: float = 0.7
num_proposals: int = 1000
test_pre_nms_top_k: int = 1000
test_pre_nms_score_threshold: float = 0.0
test_pre_nms_min_size_threshold: float = 0.0
test_nms_iou_threshold: float = 0.7
test_num_proposals: int = 1000
use_batched_nms: bool = False
@dataclasses.dataclass
class ROISampler(hyperparams.Config):
mix_gt_boxes: bool = True
num_sampled_rois: int = 512
foreground_fraction: float = 0.25
foreground_iou_threshold: float = 0.5
background_iou_high_threshold: float = 0.5
background_iou_low_threshold: float = 0.0
# IoU thresholds for additional FRCNN heads in Cascade mode.
# `foreground_iou_threshold` is the first threshold.
cascade_iou_thresholds: Optional[List[float]] = None
@dataclasses.dataclass
class ROIAligner(hyperparams.Config):
crop_size: int = 7
sample_offset: float = 0.5
@dataclasses.dataclass
class DetectionGenerator(hyperparams.Config):
apply_nms: bool = True
pre_nms_top_k: int = 5000
pre_nms_score_threshold: float = 0.05
nms_iou_threshold: float = 0.5
max_num_detections: int = 100
nms_version: str = 'v2' # `v2`, `v1`, `batched`
use_cpu_nms: bool = False
soft_nms_sigma: Optional[float] = None # Only works when nms_version='v1'.
@dataclasses.dataclass
class MaskHead(hyperparams.Config):
upsample_factor: int = 2
num_convs: int = 4
num_filters: int = 256
use_separable_conv: bool = False
class_agnostic: bool = False
@dataclasses.dataclass
class MaskSampler(hyperparams.Config):
num_sampled_masks: int = 128
@dataclasses.dataclass
class MaskROIAligner(hyperparams.Config):
crop_size: int = 14
sample_offset: float = 0.5
@dataclasses.dataclass
class MaskRCNN(hyperparams.Config):
num_classes: int = 0
input_size: List[int] = dataclasses.field(default_factory=list)
min_level: int = 2
max_level: int = 6
anchor: Anchor = Anchor()
include_mask: bool = True
backbone: backbones.Backbone = backbones.Backbone(
type='resnet', resnet=backbones.ResNet())
decoder: decoders.Decoder = decoders.Decoder(
type='fpn', fpn=decoders.FPN())
rpn_head: RPNHead = RPNHead()
detection_head: DetectionHead = DetectionHead()
roi_generator: ROIGenerator = ROIGenerator()
roi_sampler: ROISampler = ROISampler()
roi_aligner: ROIAligner = ROIAligner()
detection_generator: DetectionGenerator = DetectionGenerator()
mask_head: Optional[MaskHead] = MaskHead()
mask_sampler: Optional[MaskSampler] = MaskSampler()
mask_roi_aligner: Optional[MaskROIAligner] = MaskROIAligner()
norm_activation: common.NormActivation = common.NormActivation(
norm_momentum=0.997,
norm_epsilon=0.0001,
use_sync_bn=True)
@dataclasses.dataclass
class Losses(hyperparams.Config):
loss_weight: float = 1.0
rpn_huber_loss_delta: float = 1. / 9.
frcnn_huber_loss_delta: float = 1.
l2_weight_decay: float = 0.0
rpn_score_weight: float = 1.0
rpn_box_weight: float = 1.0
frcnn_class_weight: float = 1.0
frcnn_box_weight: float = 1.0
mask_weight: float = 1.0
@dataclasses.dataclass
class MaskRCNNTask(cfg.TaskConfig):
model: MaskRCNN = MaskRCNN()
train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(is_training=False,
drop_remainder=False)
losses: Losses = Losses()
init_checkpoint: Optional[str] = None
init_checkpoint_modules: Union[
str, List[str]] = 'all' # all, backbone, and/or decoder
annotation_file: Optional[str] = None
per_category_metrics: bool = False
# If set, we only use masks for the specified class IDs.
allowed_mask_class_ids: Optional[List[int]] = None
# If set, the COCO metrics will be computed.
use_coco_metrics: bool = True
# If set, the Waymo Open Dataset evaluator would be used.
use_wod_metrics: bool = False
COCO_INPUT_PATH_BASE = 'coco'
@exp_factory.register_config_factory('fasterrcnn_resnetfpn_coco')
def fasterrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
"""COCO object detection with Faster R-CNN."""
steps_per_epoch = 500
coco_val_samples = 5000
train_batch_size = 64
eval_batch_size = 8
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=MaskRCNNTask(
init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
init_checkpoint_modules='backbone',
annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
'instances_val2017.json'),
model=MaskRCNN(
num_classes=91,
input_size=[1024, 1024, 3],
include_mask=False,
mask_head=None,
mask_sampler=None,
mask_roi_aligner=None),
losses=Losses(l2_weight_decay=0.00004),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(
aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=False)),
trainer=cfg.TrainerConfig(
train_steps=22500,
validation_steps=coco_val_samples // eval_batch_size,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [15000, 20000],
'values': [0.12, 0.012, 0.0012],
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 500,
'warmup_learning_rate': 0.0067
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
@exp_factory.register_config_factory('maskrcnn_resnetfpn_coco')
def maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
"""COCO object detection with Mask R-CNN."""
steps_per_epoch = 500
coco_val_samples = 5000
train_batch_size = 64
eval_batch_size = 8
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(
mixed_precision_dtype='bfloat16', enable_xla=True),
task=MaskRCNNTask(
init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
init_checkpoint_modules='backbone',
annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
'instances_val2017.json'),
model=MaskRCNN(
num_classes=91, input_size=[1024, 1024, 3], include_mask=True),
losses=Losses(l2_weight_decay=0.00004),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(
aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=False)),
trainer=cfg.TrainerConfig(
train_steps=22500,
validation_steps=coco_val_samples // eval_batch_size,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [15000, 20000],
'values': [0.12, 0.012, 0.0012],
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 500,
'warmup_learning_rate': 0.0067
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
@exp_factory.register_config_factory('maskrcnn_spinenet_coco')
def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig:
"""COCO object detection with Mask R-CNN with SpineNet backbone."""
steps_per_epoch = 463
coco_val_samples = 5000
train_batch_size = 256
eval_batch_size = 8
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=MaskRCNNTask(
annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
'instances_val2017.json'),
model=MaskRCNN(
backbone=backbones.Backbone(
type='spinenet',
spinenet=backbones.SpineNet(
model_id='49',
min_level=3,
max_level=7,
)),
decoder=decoders.Decoder(
type='identity', identity=decoders.Identity()),
anchor=Anchor(anchor_size=3),
norm_activation=common.NormActivation(use_sync_bn=True),
num_classes=91,
input_size=[640, 640, 3],
min_level=3,
max_level=7,
include_mask=True),
losses=Losses(l2_weight_decay=0.00004),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(
aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=False)),
trainer=cfg.TrainerConfig(
train_steps=steps_per_epoch * 350,
validation_steps=coco_val_samples // eval_batch_size,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [
steps_per_epoch * 320, steps_per_epoch * 340
],
'values': [0.32, 0.032, 0.0032],
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 2000,
'warmup_learning_rate': 0.0067
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None',
'task.model.min_level == task.model.backbone.spinenet.min_level',
'task.model.max_level == task.model.backbone.spinenet.max_level',
])
return config
@exp_factory.register_config_factory('cascadercnn_spinenet_coco')
def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig:
"""COCO object detection with Cascade RCNN-RS with SpineNet backbone."""
steps_per_epoch = 463
coco_val_samples = 5000
train_batch_size = 256
eval_batch_size = 8
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=MaskRCNNTask(
annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
'instances_val2017.json'),
model=MaskRCNN(
backbone=backbones.Backbone(
type='spinenet',
spinenet=backbones.SpineNet(
model_id='49',
min_level=3,
max_level=7,
)),
decoder=decoders.Decoder(
type='identity', identity=decoders.Identity()),
roi_sampler=ROISampler(cascade_iou_thresholds=[0.6, 0.7]),
detection_head=DetectionHead(
class_agnostic_bbox_pred=True, cascade_class_ensemble=True),
anchor=Anchor(anchor_size=3),
norm_activation=common.NormActivation(
use_sync_bn=True, activation='swish'),
num_classes=91,
input_size=[640, 640, 3],
min_level=3,
max_level=7,
include_mask=True),
losses=Losses(l2_weight_decay=0.00004),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(
aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.5)),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=False)),
trainer=cfg.TrainerConfig(
train_steps=steps_per_epoch * 500,
validation_steps=coco_val_samples // eval_batch_size,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [
steps_per_epoch * 475, steps_per_epoch * 490
],
'values': [0.32, 0.032, 0.0032],
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 2000,
'warmup_learning_rate': 0.0067
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None',
'task.model.min_level == task.model.backbone.spinenet.min_level',
'task.model.max_level == task.model.backbone.spinenet.max_level',
])
return config
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for maskrcnn."""
# pylint: disable=unused-import
from absl.testing import parameterized
import tensorflow as tf
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.vision import beta
from official.vision.beta.configs import maskrcnn as exp_cfg
class MaskRCNNConfigTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
('fasterrcnn_resnetfpn_coco',),
('maskrcnn_resnetfpn_coco',),
('maskrcnn_spinenet_coco',),
('cascadercnn_spinenet_coco',),
)
def test_maskrcnn_configs(self, config_name):
config = exp_factory.get_exp_config(config_name)
self.assertIsInstance(config, cfg.ExperimentConfig)
self.assertIsInstance(config.task, exp_cfg.MaskRCNNTask)
self.assertIsInstance(config.task.model, exp_cfg.MaskRCNN)
self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
config.validate()
config.task.train_data.is_training = None
with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
config.validate()
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""RetinaNet configuration definition."""
import dataclasses
import os
from typing import List, Optional, Union
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
from official.vision.beta.configs import common
from official.vision.beta.configs import decoders
from official.vision.beta.configs import backbones
# pylint: disable=missing-class-docstring
# Keep for backward compatibility.
@dataclasses.dataclass
class TfExampleDecoder(common.TfExampleDecoder):
"""A simple TF Example decoder config."""
# Keep for backward compatibility.
@dataclasses.dataclass
class TfExampleDecoderLabelMap(common.TfExampleDecoderLabelMap):
"""TF Example decoder with label map config."""
# Keep for backward compatibility.
@dataclasses.dataclass
class DataDecoder(common.DataDecoder):
"""Data decoder config."""
@dataclasses.dataclass
class Parser(hyperparams.Config):
num_channels: int = 3
match_threshold: float = 0.5
unmatched_threshold: float = 0.5
aug_rand_hflip: bool = False
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
skip_crowd_during_training: bool = True
max_num_instances: int = 100
# Can choose AutoAugment and RandAugment.
aug_type: Optional[common.Augmentation] = None
# Keep for backward compatibility. Not used.
aug_policy: Optional[str] = None
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
"""Input config for training."""
input_path: str = ''
global_batch_size: int = 0
is_training: bool = False
dtype: str = 'bfloat16'
decoder: common.DataDecoder = common.DataDecoder()
parser: Parser = Parser()
shuffle_buffer_size: int = 10000
file_type: str = 'tfrecord'
@dataclasses.dataclass
class Anchor(hyperparams.Config):
num_scales: int = 3
aspect_ratios: List[float] = dataclasses.field(
default_factory=lambda: [0.5, 1.0, 2.0])
anchor_size: float = 4.0
@dataclasses.dataclass
class Losses(hyperparams.Config):
loss_weight: float = 1.0
focal_loss_alpha: float = 0.25
focal_loss_gamma: float = 1.5
huber_loss_delta: float = 0.1
box_loss_weight: int = 50
l2_weight_decay: float = 0.0
@dataclasses.dataclass
class AttributeHead(hyperparams.Config):
name: str = ''
type: str = 'regression'
size: int = 1
@dataclasses.dataclass
class RetinaNetHead(hyperparams.Config):
num_convs: int = 4
num_filters: int = 256
use_separable_conv: bool = False
attribute_heads: List[AttributeHead] = dataclasses.field(default_factory=list)
@dataclasses.dataclass
class DetectionGenerator(hyperparams.Config):
apply_nms: bool = True
pre_nms_top_k: int = 5000
pre_nms_score_threshold: float = 0.05
nms_iou_threshold: float = 0.5
max_num_detections: int = 100
nms_version: str = 'v2' # `v2`, `v1`, `batched`.
use_cpu_nms: bool = False
soft_nms_sigma: Optional[float] = None # Only works when nms_version='v1'.
@dataclasses.dataclass
class RetinaNet(hyperparams.Config):
num_classes: int = 0
input_size: List[int] = dataclasses.field(default_factory=list)
min_level: int = 3
max_level: int = 7
anchor: Anchor = Anchor()
backbone: backbones.Backbone = backbones.Backbone(
type='resnet', resnet=backbones.ResNet())
decoder: decoders.Decoder = decoders.Decoder(
type='fpn', fpn=decoders.FPN())
head: RetinaNetHead = RetinaNetHead()
detection_generator: DetectionGenerator = DetectionGenerator()
norm_activation: common.NormActivation = common.NormActivation()
@dataclasses.dataclass
class ExportConfig(hyperparams.Config):
output_normalized_coordinates: bool = False
cast_num_detections_to_float: bool = False
cast_detection_classes_to_float: bool = False
@dataclasses.dataclass
class RetinaNetTask(cfg.TaskConfig):
model: RetinaNet = RetinaNet()
train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(is_training=False)
losses: Losses = Losses()
init_checkpoint: Optional[str] = None
init_checkpoint_modules: Union[
str, List[str]] = 'all' # all, backbone, and/or decoder
annotation_file: Optional[str] = None
per_category_metrics: bool = False
export_config: ExportConfig = ExportConfig()
@exp_factory.register_config_factory('retinanet')
def retinanet() -> cfg.ExperimentConfig:
"""RetinaNet general config."""
return cfg.ExperimentConfig(
task=RetinaNetTask(),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
COCO_INPUT_PATH_BASE = 'coco'
COCO_TRAIN_EXAMPLES = 118287
COCO_VAL_EXAMPLES = 5000
@exp_factory.register_config_factory('retinanet_resnetfpn_coco')
def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig:
"""COCO object detection with RetinaNet."""
train_batch_size = 256
eval_batch_size = 8
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=RetinaNetTask(
init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
init_checkpoint_modules='backbone',
annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
'instances_val2017.json'),
model=RetinaNet(
num_classes=91,
input_size=[640, 640, 3],
norm_activation=common.NormActivation(use_sync_bn=False),
min_level=3,
max_level=7),
losses=Losses(l2_weight_decay=1e-4),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(
aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.2)),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig(
train_steps=72 * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [
57 * steps_per_epoch, 67 * steps_per_epoch
],
'values': [
0.32 * train_batch_size / 256.0,
0.032 * train_batch_size / 256.0,
0.0032 * train_batch_size / 256.0
],
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 500,
'warmup_learning_rate': 0.0067
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
@exp_factory.register_config_factory('retinanet_spinenet_coco')
def retinanet_spinenet_coco() -> cfg.ExperimentConfig:
"""COCO object detection with RetinaNet using SpineNet backbone."""
train_batch_size = 256
eval_batch_size = 8
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
input_size = 640
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'),
task=RetinaNetTask(
annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
'instances_val2017.json'),
model=RetinaNet(
backbone=backbones.Backbone(
type='spinenet',
spinenet=backbones.SpineNet(
model_id='49',
stochastic_depth_drop_rate=0.2,
min_level=3,
max_level=7)),
decoder=decoders.Decoder(
type='identity', identity=decoders.Identity()),
anchor=Anchor(anchor_size=3),
norm_activation=common.NormActivation(
use_sync_bn=True, activation='swish'),
num_classes=91,
input_size=[input_size, input_size, 3],
min_level=3,
max_level=7),
losses=Losses(l2_weight_decay=4e-5),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(
aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig(
train_steps=500 * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [
475 * steps_per_epoch, 490 * steps_per_epoch
],
'values': [
0.32 * train_batch_size / 256.0,
0.032 * train_batch_size / 256.0,
0.0032 * train_batch_size / 256.0
],
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 2000,
'warmup_learning_rate': 0.0067
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None',
'task.model.min_level == task.model.backbone.spinenet.min_level',
'task.model.max_level == task.model.backbone.spinenet.max_level',
])
return config
@exp_factory.register_config_factory('retinanet_mobile_coco')
def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig:
"""COCO object detection with mobile RetinaNet."""
train_batch_size = 256
eval_batch_size = 8
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
input_size = 384
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'),
task=RetinaNetTask(
annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
'instances_val2017.json'),
model=RetinaNet(
backbone=backbones.Backbone(
type='spinenet_mobile',
spinenet_mobile=backbones.SpineNetMobile(
model_id='49',
stochastic_depth_drop_rate=0.2,
min_level=3,
max_level=7,
use_keras_upsampling_2d=False)),
decoder=decoders.Decoder(
type='identity', identity=decoders.Identity()),
head=RetinaNetHead(num_filters=48, use_separable_conv=True),
anchor=Anchor(anchor_size=3),
norm_activation=common.NormActivation(
use_sync_bn=True, activation='swish'),
num_classes=91,
input_size=[input_size, input_size, 3],
min_level=3,
max_level=7),
losses=Losses(l2_weight_decay=3e-5),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(
aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig(
train_steps=600 * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [
575 * steps_per_epoch, 590 * steps_per_epoch
],
'values': [
0.32 * train_batch_size / 256.0,
0.032 * train_batch_size / 256.0,
0.0032 * train_batch_size / 256.0
],
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 2000,
'warmup_learning_rate': 0.0067
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None',
])
return config
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for retinanet."""
# pylint: disable=unused-import
from absl.testing import parameterized
import tensorflow as tf
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.vision import beta
from official.vision.beta.configs import retinanet as exp_cfg
class RetinaNetConfigTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
('retinanet_resnetfpn_coco',),
('retinanet_spinenet_coco',),
('retinanet_mobile_coco',),
)
def test_retinanet_configs(self, config_name):
config = exp_factory.get_exp_config(config_name)
self.assertIsInstance(config, cfg.ExperimentConfig)
self.assertIsInstance(config.task, exp_cfg.RetinaNetTask)
self.assertIsInstance(config.task.model, exp_cfg.RetinaNet)
self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
config.validate()
config.task.train_data.is_training = None
with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'):
config.validate()
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment