Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
c44482ab
Commit
c44482ab
authored
Mar 01, 2022
by
A. Unique TensorFlower
Browse files
Internal change
PiperOrigin-RevId: 431756117
parent
10ee28dd
Changes
235
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3162 additions
and
0 deletions
+3162
-0
official/vision/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml
...gs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml
+60
-0
official/vision/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml
...s/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml
+60
-0
official/vision/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
...on/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
+58
-0
official/vision/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
...figs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
+34
-0
official/vision/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml
...eriments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml
+7
-0
official/vision/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
...entation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
+78
-0
official/vision/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml
...xperiments/video_classification/k400_3d-resnet50_tpu.yaml
+88
-0
official/vision/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
...eriments/video_classification/k400_resnet3drs_50_tpu.yaml
+99
-0
official/vision/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml
...periments/video_classification/k400_slowonly16x4_tpu.yaml
+88
-0
official/vision/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
...xperiments/video_classification/k400_slowonly8x8_tpu.yaml
+88
-0
official/vision/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml
...xperiments/video_classification/k600_3d-resnet50_tpu.yaml
+88
-0
official/vision/configs/experiments/video_classification/k600_3d-resnet50g_tpu.yaml
...periments/video_classification/k600_3d-resnet50g_tpu.yaml
+112
-0
official/vision/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml
...xperiments/video_classification/k600_slowonly8x8_tpu.yaml
+88
-0
official/vision/configs/image_classification.py
official/vision/configs/image_classification.py
+398
-0
official/vision/configs/image_classification_test.py
official/vision/configs/image_classification_test.py
+49
-0
official/vision/configs/maskrcnn.py
official/vision/configs/maskrcnn.py
+523
-0
official/vision/configs/maskrcnn_test.py
official/vision/configs/maskrcnn_test.py
+47
-0
official/vision/configs/retinanet.py
official/vision/configs/retinanet.py
+438
-0
official/vision/configs/retinanet_test.py
official/vision/configs/retinanet_test.py
+46
-0
official/vision/configs/semantic_segmentation.py
official/vision/configs/semantic_segmentation.py
+713
-0
No files found.
official/vision/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml
0 → 100644
View file @
c44482ab
# --experiment_type=retinanet_mobile_coco
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
3.0e-05
model
:
anchor
:
anchor_size
:
3
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
spinenet_mobile
:
stochastic_depth_drop_rate
:
0.2
model_id
:
'
49S'
se_ratio
:
0.2
type
:
'
spinenet_mobile'
decoder
:
type
:
'
identity'
head
:
num_convs
:
4
num_filters
:
40
use_separable_conv
:
true
input_size
:
[
384
,
384
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.5
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
checkpoint_interval
:
462
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
263340
,
272580
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
277200
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml
0 → 100644
View file @
c44482ab
# --experiment_type=retinanet_mobile_coco
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
3.0e-05
model
:
anchor
:
anchor_size
:
3
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
spinenet_mobile
:
stochastic_depth_drop_rate
:
0.2
model_id
:
'
49XS'
se_ratio
:
0.2
type
:
'
spinenet_mobile'
decoder
:
type
:
'
identity'
head
:
num_convs
:
4
num_filters
:
24
use_separable_conv
:
true
input_size
:
[
256
,
256
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.5
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
checkpoint_interval
:
462
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
263340
,
272580
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
277200
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
0 → 100644
View file @
c44482ab
# SpineNet-96 COCO detection with protocol C config. Expecting 48.5% AP.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
4.0e-05
model
:
anchor
:
anchor_size
:
3
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
spinenet
:
stochastic_depth_drop_rate
:
0.2
model_id
:
'
96'
type
:
'
spinenet'
decoder
:
type
:
'
identity'
head
:
num_convs
:
4
num_filters
:
256
input_size
:
[
1024
,
1024
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.1
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
checkpoint_interval
:
462
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
219450
,
226380
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
231000
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
0 → 100644
View file @
c44482ab
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
annotation_file
:
'
'
# Can't use annotation file when tfds is used.
losses
:
l2_weight_decay
:
0.0001
model
:
num_classes
:
91
max_level
:
7
min_level
:
3
input_size
:
[
640
,
640
,
3
]
norm_activation
:
activation
:
relu
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
tfds_name
:
'
coco/2017'
tfds_split
:
'
train'
drop_remainder
:
true
dtype
:
bfloat16
global_batch_size
:
256
input_path
:
'
'
is_training
:
true
shuffle_buffer_size
:
1000
validation_data
:
tfds_name
:
'
coco/2017'
tfds_split
:
'
validation'
drop_remainder
:
true
dtype
:
bfloat16
global_batch_size
:
8
input_path
:
'
'
is_training
:
false
official/vision/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml
0 → 100644
View file @
c44482ab
# Benchmarks runs on same instnace, change eval batch size to fit on 4x4 tpu
task
:
validation_data
:
global_batch_size
:
32
trainer
:
validation_interval
:
1560
validation_steps
:
156
official/vision/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
0 → 100644
View file @
c44482ab
# Use your own cityscapes preprocessed dataset. 79% meanIoU.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
float32'
task
:
model
:
num_classes
:
19
input_size
:
[
null
,
null
,
3
]
backbone
:
type
:
'
dilated_resnet'
dilated_resnet
:
model_id
:
101
output_stride
:
16
stem_type
:
'
v1'
se_ratio
:
0.25
stochastic_depth_drop_rate
:
0.2
multigrid
:
[
1
,
2
,
4
]
last_stage_repeats
:
1
decoder
:
aspp
:
pool_kernel_size
:
[
512
,
1024
]
head
:
feature_fusion
:
'
deeplabv3plus'
low_level
:
2
low_level_num_filters
:
48
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
losses
:
top_k_percent_pixels
:
1.0
# only backpropagate loss for the topk 100% pixels.
train_data
:
output_size
:
[
1024
,
2048
]
crop_size
:
[
512
,
1024
]
input_path
:
'
'
tfds_name
:
'
cityscapes/semantic_segmentation'
tfds_split
:
'
train'
is_training
:
true
global_batch_size
:
16
dtype
:
'
float32'
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.5
validation_data
:
output_size
:
[
1024
,
2048
]
input_path
:
'
'
tfds_name
:
'
cityscapes/semantic_segmentation'
tfds_split
:
'
validation'
is_training
:
false
global_batch_size
:
16
dtype
:
'
float32'
drop_remainder
:
false
resize_eval_groundtruth
:
true
trainer
:
optimizer_config
:
learning_rate
:
polynomial
:
decay_steps
:
90000
initial_learning_rate
:
0.01
power
:
0.9
type
:
polynomial
optimizer
:
sgd
:
momentum
:
0.9
type
:
sgd
warmup
:
linear
:
name
:
linear
warmup_learning_rate
:
0
warmup_steps
:
925
type
:
linear
steps_per_loop
:
185
summary_interval
:
185
train_steps
:
90000
validation_interval
:
185
validation_steps
:
31
checkpoint_interval
:
185
official/vision/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml
0 → 100644
View file @
c44482ab
# 3D ResNet-50 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 77.0% top-1, 93.0% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
dropout_rate
:
0.5
norm_activation
:
use_sync_bn
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
model_id
:
50
stem_conv_temporal_kernel_size
:
5
stem_conv_temporal_stride
:
2
stem_pool_temporal_stride
:
1
train_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
32
-
224
-
224
-
3
temporal_stride
:
2
global_batch_size
:
1024
dtype
:
'
bfloat16'
shuffle_buffer_size
:
1024
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
32
-
256
-
256
-
3
temporal_stride
:
2
num_test_clips
:
10
num_test_crops
:
3
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
initial_learning_rate
:
0.8
decay_steps
:
42104
warmup
:
linear
:
warmup_steps
:
1053
train_steps
:
42104
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
0 → 100644
View file @
c44482ab
# 3D ResNet-RS-50 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 78.2% top-1 accuracy.
runtime
:
mixed_precision_dtype
:
bfloat16
task
:
losses
:
l2_weight_decay
:
0.00004
label_smoothing
:
0.1
one_hot
:
true
model
:
aggregate_endpoints
:
false
backbone
:
resnet_3d_rs
:
model_id
:
50
stem_type
:
'
v1'
stem_conv_temporal_kernel_size
:
5
stem_conv_temporal_stride
:
2
stem_pool_temporal_stride
:
1
stochastic_depth_drop_rate
:
0.1
se_ratio
:
0.25
type
:
resnet_3d_rs
dropout_rate
:
0.5
model_type
:
video_classification
norm_activation
:
activation
:
relu
norm_epsilon
:
1.0e-05
norm_momentum
:
0.0
use_sync_bn
:
false
train_data
:
data_format
:
channels_last
drop_remainder
:
true
dtype
:
bfloat16
feature_shape
:
!!python/tuple
-
32
-
224
-
224
-
3
file_type
:
sstable
global_batch_size
:
1024
is_training
:
true
min_image_size
:
256
name
:
kinetics400
num_channels
:
3
num_classes
:
400
num_examples
:
215570
num_test_clips
:
1
num_test_crops
:
1
one_hot
:
true
temporal_stride
:
2
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
data_format
:
channels_last
drop_remainder
:
false
dtype
:
bfloat16
feature_shape
:
!!python/tuple
-
32
-
256
-
256
-
3
file_type
:
sstable
global_batch_size
:
64
is_training
:
false
min_image_size
:
256
name
:
kinetics400
num_channels
:
3
num_classes
:
400
num_examples
:
17706
num_test_clips
:
10
num_test_crops
:
3
one_hot
:
true
temporal_stride
:
2
trainer
:
checkpoint_interval
:
210
max_to_keep
:
3
optimizer_config
:
ema
:
average_decay
:
0.9999
trainable_weights_only
:
false
learning_rate
:
cosine
:
decay_steps
:
73682
initial_learning_rate
:
0.8
name
:
CosineDecay
type
:
cosine
warmup
:
linear
:
name
:
linear
warmup_learning_rate
:
0
warmup_steps
:
1050
type
:
linear
train_steps
:
73682
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml
0 → 100644
View file @
c44482ab
# SlowOnly 16x4 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 75.6% top-1, 92.1% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
dropout_rate
:
0.5
norm_activation
:
use_sync_bn
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
model_id
:
50
stem_conv_temporal_kernel_size
:
1
stem_conv_temporal_stride
:
1
stem_pool_temporal_stride
:
1
train_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
16
-
224
-
224
-
3
temporal_stride
:
4
global_batch_size
:
1024
dtype
:
'
bfloat16'
shuffle_buffer_size
:
1024
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
16
-
256
-
256
-
3
temporal_stride
:
4
num_test_clips
:
10
num_test_crops
:
3
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
initial_learning_rate
:
0.8
decay_steps
:
42104
warmup
:
linear
:
warmup_steps
:
1053
train_steps
:
42104
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
0 → 100644
View file @
c44482ab
# SlowOnly 8x8 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 74.1% top-1, 91.4% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
dropout_rate
:
0.5
norm_activation
:
use_sync_bn
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
model_id
:
50
stem_conv_temporal_kernel_size
:
1
stem_conv_temporal_stride
:
1
stem_pool_temporal_stride
:
1
train_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
8
-
224
-
224
-
3
temporal_stride
:
8
global_batch_size
:
1024
dtype
:
'
bfloat16'
shuffle_buffer_size
:
1024
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
8
-
256
-
256
-
3
temporal_stride
:
8
num_test_clips
:
10
num_test_crops
:
3
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
initial_learning_rate
:
0.8
decay_steps
:
42104
warmup
:
linear
:
warmup_steps
:
1053
train_steps
:
42104
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml
0 → 100644
View file @
c44482ab
# 3D ResNet-50 video classification on Kinetics-600.
#
# --experiment_type=video_classification_kinetics600
# Expected accuracy: 79.5% top-1, 94.8% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
dropout_rate
:
0.5
norm_activation
:
use_sync_bn
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
model_id
:
50
stem_conv_temporal_kernel_size
:
5
stem_conv_temporal_stride
:
2
stem_pool_temporal_stride
:
1
train_data
:
name
:
kinetics600
feature_shape
:
!!python/tuple
-
32
-
224
-
224
-
3
temporal_stride
:
2
global_batch_size
:
1024
dtype
:
'
bfloat16'
shuffle_buffer_size
:
1024
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
name
:
kinetics600
feature_shape
:
!!python/tuple
-
32
-
256
-
256
-
3
temporal_stride
:
2
num_test_clips
:
10
num_test_crops
:
3
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
initial_learning_rate
:
0.8
decay_steps
:
71488
warmup
:
linear
:
warmup_steps
:
1787
train_steps
:
71488
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k600_3d-resnet50g_tpu.yaml
0 → 100644
View file @
c44482ab
# 3D ResNet-50g video classification on Kinetics-600.
#
# --experiment_type=video_classification_kinetics600
# Expected accuracy: 78.7% accuracy, 93.6% top-5.
# Train on TPU: v3-128, eval on TPU: v3-32
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
init_checkpoint
:
null
init_checkpoint_modules
:
all
losses
:
l2_weight_decay
:
0.0001
label_smoothing
:
0.0
model
:
aggregate_endpoints
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
true
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
1
-
3
-
1
temporal_strides
:
1
use_self_gating
:
true
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
1
-
3
-
1
-
3
-
1
temporal_strides
:
1
use_self_gating
:
true
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
3
-
1
temporal_strides
:
1
use_self_gating
:
true
model_id
:
50
stem_conv_temporal_kernel_size
:
5
stem_conv_temporal_stride
:
2
stem_pool_temporal_stride
:
2
stem_type
:
v0
stochastic_depth_drop_rate
:
0.0
type
:
resnet_3d
dropout_rate
:
0.2
model_type
:
video_classification
norm_activation
:
activation
:
relu
norm_epsilon
:
1.0e-05
norm_momentum
:
0.9
use_sync_bn
:
false
train_data
:
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.49
aug_min_aspect_ratio
:
0.5
drop_remainder
:
true
dtype
:
'
bfloat16'
feature_shape
:
!!python/tuple
-
64
-
224
-
224
-
3
global_batch_size
:
1024
min_image_size
:
256
name
:
kinetics600
num_classes
:
600
split
:
train
validation_data
:
dtype
:
'
bfloat16'
feature_shape
:
!!python/tuple
-
250
-
224
-
224
-
3
global_batch_size
:
64
min_image_size
:
256
name
:
kinetics600
num_classes
:
600
num_examples
:
27780
num_test_clips
:
1
num_test_crops
:
1
one_hot
:
true
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
alpha
:
0.0
decay_steps
:
71400
initial_learning_rate
:
1.6
name
:
CosineDecay
type
:
cosine
warmup
:
linear
:
name
:
linear
warmup_learning_rate
:
0
warmup_steps
:
1785
type
:
linear
train_steps
:
71400
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml
0 → 100644
View file @
c44482ab
# SlowOnly 8x8 video classification on Kinetics-600.
#
# --experiment_type=video_classification_kinetics600
# Expected accuracy: 77.3% top-1, 93.6% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
dropout_rate
:
0.5
norm_activation
:
use_sync_bn
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
model_id
:
50
stem_conv_temporal_kernel_size
:
1
stem_conv_temporal_stride
:
1
stem_pool_temporal_stride
:
1
train_data
:
name
:
kinetics600
feature_shape
:
!!python/tuple
-
8
-
224
-
224
-
3
temporal_stride
:
8
global_batch_size
:
1024
dtype
:
'
bfloat16'
shuffle_buffer_size
:
1024
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
name
:
kinetics600
feature_shape
:
!!python/tuple
-
8
-
256
-
256
-
3
temporal_stride
:
8
num_test_clips
:
10
num_test_crops
:
3
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
initial_learning_rate
:
0.8
decay_steps
:
71488
warmup
:
linear
:
warmup_steps
:
1787
train_steps
:
71488
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/image_classification.py
0 → 100644
View file @
c44482ab
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Image classification configuration definition."""
import
dataclasses
import
os
from
typing
import
List
,
Optional
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.modeling
import
optimization
from
official.vision.configs
import
common
from
official.vision.configs
import
backbones
@
dataclasses
.
dataclass
class
DataConfig
(
cfg
.
DataConfig
):
"""Input config for training."""
input_path
:
str
=
''
global_batch_size
:
int
=
0
is_training
:
bool
=
True
dtype
:
str
=
'float32'
shuffle_buffer_size
:
int
=
10000
cycle_length
:
int
=
10
is_multilabel
:
bool
=
False
aug_rand_hflip
:
bool
=
True
aug_type
:
Optional
[
common
.
Augmentation
]
=
None
# Choose from AutoAugment and RandAugment.
color_jitter
:
float
=
0.
random_erasing
:
Optional
[
common
.
RandomErasing
]
=
None
file_type
:
str
=
'tfrecord'
image_field_key
:
str
=
'image/encoded'
label_field_key
:
str
=
'image/class/label'
decode_jpeg_only
:
bool
=
True
mixup_and_cutmix
:
Optional
[
common
.
MixupAndCutmix
]
=
None
decoder
:
Optional
[
common
.
DataDecoder
]
=
common
.
DataDecoder
()
# Keep for backward compatibility.
aug_policy
:
Optional
[
str
]
=
None
# None, 'autoaug', or 'randaug'.
randaug_magnitude
:
Optional
[
int
]
=
10
@
dataclasses
.
dataclass
class
ImageClassificationModel
(
hyperparams
.
Config
):
"""The model config."""
num_classes
:
int
=
0
input_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
())
dropout_rate
:
float
=
0.0
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
(
use_sync_bn
=
False
)
# Adds a BatchNormalization layer pre-GlobalAveragePooling in classification
add_head_batch_norm
:
bool
=
False
kernel_initializer
:
str
=
'random_uniform'
@
dataclasses
.
dataclass
class
Losses
(
hyperparams
.
Config
):
loss_weight
:
float
=
1.0
one_hot
:
bool
=
True
label_smoothing
:
float
=
0.0
l2_weight_decay
:
float
=
0.0
soft_labels
:
bool
=
False
@
dataclasses
.
dataclass
class
Evaluation
(
hyperparams
.
Config
):
top_k
:
int
=
5
@
dataclasses
.
dataclass
class
ImageClassificationTask
(
cfg
.
TaskConfig
):
"""The task config."""
model
:
ImageClassificationModel
=
ImageClassificationModel
()
train_data
:
DataConfig
=
DataConfig
(
is_training
=
True
)
validation_data
:
DataConfig
=
DataConfig
(
is_training
=
False
)
losses
:
Losses
=
Losses
()
evaluation
:
Evaluation
=
Evaluation
()
init_checkpoint
:
Optional
[
str
]
=
None
init_checkpoint_modules
:
str
=
'all'
# all or backbone
model_output_keys
:
Optional
[
List
[
int
]]
=
dataclasses
.
field
(
default_factory
=
list
)
@
exp_factory
.
register_config_factory
(
'image_classification'
)
def
image_classification
()
->
cfg
.
ExperimentConfig
:
"""Image classification general."""
return
cfg
.
ExperimentConfig
(
task
=
ImageClassificationTask
(),
trainer
=
cfg
.
TrainerConfig
(),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
IMAGENET_TRAIN_EXAMPLES
=
1281167
IMAGENET_VAL_EXAMPLES
=
50000
IMAGENET_INPUT_PATH_BASE
=
'imagenet-2012-tfrecord'
@
exp_factory
.
register_config_factory
(
'resnet_imagenet'
)
def
image_classification_imagenet
()
->
cfg
.
ExperimentConfig
:
"""Image classification on imagenet with resnet."""
train_batch_size
=
4096
eval_batch_size
=
4096
steps_per_epoch
=
IMAGENET_TRAIN_EXAMPLES
//
train_batch_size
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
enable_xla
=
True
),
task
=
ImageClassificationTask
(
model
=
ImageClassificationModel
(
num_classes
=
1001
,
input_size
=
[
224
,
224
,
3
],
backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
50
)),
norm_activation
=
common
.
NormActivation
(
norm_momentum
=
0.9
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
)),
losses
=
Losses
(
l2_weight_decay
=
1e-4
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'valid*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
train_steps
=
90
*
steps_per_epoch
,
validation_steps
=
IMAGENET_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
30
*
steps_per_epoch
,
60
*
steps_per_epoch
,
80
*
steps_per_epoch
],
'values'
:
[
0.1
*
train_batch_size
/
256
,
0.01
*
train_batch_size
/
256
,
0.001
*
train_batch_size
/
256
,
0.0001
*
train_batch_size
/
256
,
]
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
5
*
steps_per_epoch
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'resnet_rs_imagenet'
)
def
image_classification_imagenet_resnetrs
()
->
cfg
.
ExperimentConfig
:
"""Image classification on imagenet with resnet-rs."""
train_batch_size
=
4096
eval_batch_size
=
4096
steps_per_epoch
=
IMAGENET_TRAIN_EXAMPLES
//
train_batch_size
config
=
cfg
.
ExperimentConfig
(
task
=
ImageClassificationTask
(
model
=
ImageClassificationModel
(
num_classes
=
1001
,
input_size
=
[
160
,
160
,
3
],
backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
50
,
stem_type
=
'v1'
,
resnetd_shortcut
=
True
,
replace_stem_max_pool
=
True
,
se_ratio
=
0.25
,
stochastic_depth_drop_rate
=
0.0
)),
dropout_rate
=
0.25
,
norm_activation
=
common
.
NormActivation
(
norm_momentum
=
0.0
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
,
activation
=
'swish'
)),
losses
=
Losses
(
l2_weight_decay
=
4e-5
,
label_smoothing
=
0.1
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
aug_type
=
common
.
Augmentation
(
type
=
'randaug'
,
randaug
=
common
.
RandAugment
(
magnitude
=
10
))),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'valid*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
train_steps
=
350
*
steps_per_epoch
,
validation_steps
=
IMAGENET_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'ema'
:
{
'average_decay'
:
0.9999
,
'trainable_weights_only'
:
False
,
},
'learning_rate'
:
{
'type'
:
'cosine'
,
'cosine'
:
{
'initial_learning_rate'
:
1.6
,
'decay_steps'
:
350
*
steps_per_epoch
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
5
*
steps_per_epoch
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'revnet_imagenet'
)
def
image_classification_imagenet_revnet
()
->
cfg
.
ExperimentConfig
:
"""Returns a revnet config for image classification on imagenet."""
train_batch_size
=
4096
eval_batch_size
=
4096
steps_per_epoch
=
IMAGENET_TRAIN_EXAMPLES
//
train_batch_size
config
=
cfg
.
ExperimentConfig
(
task
=
ImageClassificationTask
(
model
=
ImageClassificationModel
(
num_classes
=
1001
,
input_size
=
[
224
,
224
,
3
],
backbone
=
backbones
.
Backbone
(
type
=
'revnet'
,
revnet
=
backbones
.
RevNet
(
model_id
=
56
)),
norm_activation
=
common
.
NormActivation
(
norm_momentum
=
0.9
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
),
add_head_batch_norm
=
True
),
losses
=
Losses
(
l2_weight_decay
=
1e-4
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'valid*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
train_steps
=
90
*
steps_per_epoch
,
validation_steps
=
IMAGENET_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
30
*
steps_per_epoch
,
60
*
steps_per_epoch
,
80
*
steps_per_epoch
],
'values'
:
[
0.8
,
0.08
,
0.008
,
0.0008
]
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
5
*
steps_per_epoch
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'mobilenet_imagenet'
)
def
image_classification_imagenet_mobilenet
()
->
cfg
.
ExperimentConfig
:
"""Image classification on imagenet with mobilenet."""
train_batch_size
=
4096
eval_batch_size
=
4096
steps_per_epoch
=
IMAGENET_TRAIN_EXAMPLES
//
train_batch_size
config
=
cfg
.
ExperimentConfig
(
task
=
ImageClassificationTask
(
model
=
ImageClassificationModel
(
num_classes
=
1001
,
dropout_rate
=
0.2
,
input_size
=
[
224
,
224
,
3
],
backbone
=
backbones
.
Backbone
(
type
=
'mobilenet'
,
mobilenet
=
backbones
.
MobileNet
(
model_id
=
'MobileNetV2'
,
filter_size_scale
=
1.0
)),
norm_activation
=
common
.
NormActivation
(
norm_momentum
=
0.997
,
norm_epsilon
=
1e-3
,
use_sync_bn
=
False
)),
losses
=
Losses
(
l2_weight_decay
=
1e-5
,
label_smoothing
=
0.1
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'valid*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
train_steps
=
500
*
steps_per_epoch
,
validation_steps
=
IMAGENET_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'rmsprop'
,
'rmsprop'
:
{
'rho'
:
0.9
,
'momentum'
:
0.9
,
'epsilon'
:
0.002
,
}
},
'learning_rate'
:
{
'type'
:
'exponential'
,
'exponential'
:
{
'initial_learning_rate'
:
0.008
*
(
train_batch_size
//
128
),
'decay_steps'
:
int
(
2.5
*
steps_per_epoch
),
'decay_rate'
:
0.98
,
'staircase'
:
True
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
5
*
steps_per_epoch
,
'warmup_learning_rate'
:
0
}
},
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
official/vision/configs/image_classification_test.py
0 → 100644
View file @
c44482ab
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for image_classification."""
# pylint: disable=unused-import
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official
import
vision
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.vision.configs
import
image_classification
as
exp_cfg
class
ImageClassificationConfigTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
(
'resnet_imagenet'
,),
(
'resnet_rs_imagenet'
,),
(
'revnet_imagenet'
,),
(
'mobilenet_imagenet'
),
)
def
test_image_classification_configs
(
self
,
config_name
):
config
=
exp_factory
.
get_exp_config
(
config_name
)
self
.
assertIsInstance
(
config
,
cfg
.
ExperimentConfig
)
self
.
assertIsInstance
(
config
.
task
,
exp_cfg
.
ImageClassificationTask
)
self
.
assertIsInstance
(
config
.
task
.
model
,
exp_cfg
.
ImageClassificationModel
)
self
.
assertIsInstance
(
config
.
task
.
train_data
,
exp_cfg
.
DataConfig
)
config
.
validate
()
config
.
task
.
train_data
.
is_training
=
None
with
self
.
assertRaises
(
KeyError
):
config
.
validate
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/configs/maskrcnn.py
0 → 100644
View file @
c44482ab
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""R-CNN(-RS) configuration definition."""
import
dataclasses
import
os
from
typing
import
List
,
Optional
,
Union
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.modeling
import
optimization
from
official.vision.configs
import
common
from
official.vision.configs
import
decoders
from
official.vision.configs
import
backbones
# pylint: disable=missing-class-docstring
@
dataclasses
.
dataclass
class
Parser
(
hyperparams
.
Config
):
num_channels
:
int
=
3
match_threshold
:
float
=
0.5
unmatched_threshold
:
float
=
0.5
aug_rand_hflip
:
bool
=
False
aug_scale_min
:
float
=
1.0
aug_scale_max
:
float
=
1.0
skip_crowd_during_training
:
bool
=
True
max_num_instances
:
int
=
100
rpn_match_threshold
:
float
=
0.7
rpn_unmatched_threshold
:
float
=
0.3
rpn_batch_size_per_im
:
int
=
256
rpn_fg_fraction
:
float
=
0.5
mask_crop_size
:
int
=
112
@
dataclasses
.
dataclass
class
DataConfig
(
cfg
.
DataConfig
):
"""Input config for training."""
input_path
:
str
=
''
global_batch_size
:
int
=
0
is_training
:
bool
=
False
dtype
:
str
=
'bfloat16'
decoder
:
common
.
DataDecoder
=
common
.
DataDecoder
()
parser
:
Parser
=
Parser
()
shuffle_buffer_size
:
int
=
10000
file_type
:
str
=
'tfrecord'
drop_remainder
:
bool
=
True
# Number of examples in the data set, it's used to create the annotation file.
num_examples
:
int
=
-
1
@
dataclasses
.
dataclass
class
Anchor
(
hyperparams
.
Config
):
num_scales
:
int
=
1
aspect_ratios
:
List
[
float
]
=
dataclasses
.
field
(
default_factory
=
lambda
:
[
0.5
,
1.0
,
2.0
])
anchor_size
:
float
=
8.0
@
dataclasses
.
dataclass
class
RPNHead
(
hyperparams
.
Config
):
num_convs
:
int
=
1
num_filters
:
int
=
256
use_separable_conv
:
bool
=
False
@
dataclasses
.
dataclass
class
DetectionHead
(
hyperparams
.
Config
):
num_convs
:
int
=
4
num_filters
:
int
=
256
use_separable_conv
:
bool
=
False
num_fcs
:
int
=
1
fc_dims
:
int
=
1024
class_agnostic_bbox_pred
:
bool
=
False
# Has to be True for Cascade RCNN.
# If additional IoUs are passed in 'cascade_iou_thresholds'
# then ensemble the class probabilities from all heads.
cascade_class_ensemble
:
bool
=
False
@
dataclasses
.
dataclass
class
ROIGenerator
(
hyperparams
.
Config
):
pre_nms_top_k
:
int
=
2000
pre_nms_score_threshold
:
float
=
0.0
pre_nms_min_size_threshold
:
float
=
0.0
nms_iou_threshold
:
float
=
0.7
num_proposals
:
int
=
1000
test_pre_nms_top_k
:
int
=
1000
test_pre_nms_score_threshold
:
float
=
0.0
test_pre_nms_min_size_threshold
:
float
=
0.0
test_nms_iou_threshold
:
float
=
0.7
test_num_proposals
:
int
=
1000
use_batched_nms
:
bool
=
False
@
dataclasses
.
dataclass
class
ROISampler
(
hyperparams
.
Config
):
mix_gt_boxes
:
bool
=
True
num_sampled_rois
:
int
=
512
foreground_fraction
:
float
=
0.25
foreground_iou_threshold
:
float
=
0.5
background_iou_high_threshold
:
float
=
0.5
background_iou_low_threshold
:
float
=
0.0
# IoU thresholds for additional FRCNN heads in Cascade mode.
# `foreground_iou_threshold` is the first threshold.
cascade_iou_thresholds
:
Optional
[
List
[
float
]]
=
None
@
dataclasses
.
dataclass
class
ROIAligner
(
hyperparams
.
Config
):
crop_size
:
int
=
7
sample_offset
:
float
=
0.5
@
dataclasses
.
dataclass
class
DetectionGenerator
(
hyperparams
.
Config
):
apply_nms
:
bool
=
True
pre_nms_top_k
:
int
=
5000
pre_nms_score_threshold
:
float
=
0.05
nms_iou_threshold
:
float
=
0.5
max_num_detections
:
int
=
100
nms_version
:
str
=
'v2'
# `v2`, `v1`, `batched`
use_cpu_nms
:
bool
=
False
soft_nms_sigma
:
Optional
[
float
]
=
None
# Only works when nms_version='v1'.
@
dataclasses
.
dataclass
class
MaskHead
(
hyperparams
.
Config
):
upsample_factor
:
int
=
2
num_convs
:
int
=
4
num_filters
:
int
=
256
use_separable_conv
:
bool
=
False
class_agnostic
:
bool
=
False
@
dataclasses
.
dataclass
class
MaskSampler
(
hyperparams
.
Config
):
num_sampled_masks
:
int
=
128
@
dataclasses
.
dataclass
class
MaskROIAligner
(
hyperparams
.
Config
):
crop_size
:
int
=
14
sample_offset
:
float
=
0.5
@
dataclasses
.
dataclass
class
MaskRCNN
(
hyperparams
.
Config
):
num_classes
:
int
=
0
input_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
min_level
:
int
=
2
max_level
:
int
=
6
anchor
:
Anchor
=
Anchor
()
include_mask
:
bool
=
True
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
())
decoder
:
decoders
.
Decoder
=
decoders
.
Decoder
(
type
=
'fpn'
,
fpn
=
decoders
.
FPN
())
rpn_head
:
RPNHead
=
RPNHead
()
detection_head
:
DetectionHead
=
DetectionHead
()
roi_generator
:
ROIGenerator
=
ROIGenerator
()
roi_sampler
:
ROISampler
=
ROISampler
()
roi_aligner
:
ROIAligner
=
ROIAligner
()
detection_generator
:
DetectionGenerator
=
DetectionGenerator
()
mask_head
:
Optional
[
MaskHead
]
=
MaskHead
()
mask_sampler
:
Optional
[
MaskSampler
]
=
MaskSampler
()
mask_roi_aligner
:
Optional
[
MaskROIAligner
]
=
MaskROIAligner
()
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
(
norm_momentum
=
0.997
,
norm_epsilon
=
0.0001
,
use_sync_bn
=
True
)
@
dataclasses
.
dataclass
class
Losses
(
hyperparams
.
Config
):
loss_weight
:
float
=
1.0
rpn_huber_loss_delta
:
float
=
1.
/
9.
frcnn_huber_loss_delta
:
float
=
1.
l2_weight_decay
:
float
=
0.0
rpn_score_weight
:
float
=
1.0
rpn_box_weight
:
float
=
1.0
frcnn_class_weight
:
float
=
1.0
frcnn_box_weight
:
float
=
1.0
mask_weight
:
float
=
1.0
@
dataclasses
.
dataclass
class
MaskRCNNTask
(
cfg
.
TaskConfig
):
model
:
MaskRCNN
=
MaskRCNN
()
train_data
:
DataConfig
=
DataConfig
(
is_training
=
True
)
validation_data
:
DataConfig
=
DataConfig
(
is_training
=
False
,
drop_remainder
=
False
)
losses
:
Losses
=
Losses
()
init_checkpoint
:
Optional
[
str
]
=
None
init_checkpoint_modules
:
Union
[
str
,
List
[
str
]]
=
'all'
# all, backbone, and/or decoder
annotation_file
:
Optional
[
str
]
=
None
per_category_metrics
:
bool
=
False
# If set, we only use masks for the specified class IDs.
allowed_mask_class_ids
:
Optional
[
List
[
int
]]
=
None
# If set, the COCO metrics will be computed.
use_coco_metrics
:
bool
=
True
# If set, the Waymo Open Dataset evaluator would be used.
use_wod_metrics
:
bool
=
False
COCO_INPUT_PATH_BASE
=
'coco'
@
exp_factory
.
register_config_factory
(
'fasterrcnn_resnetfpn_coco'
)
def
fasterrcnn_resnetfpn_coco
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with Faster R-CNN."""
steps_per_epoch
=
500
coco_val_samples
=
5000
train_batch_size
=
64
eval_batch_size
=
8
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
),
task
=
MaskRCNNTask
(
init_checkpoint
=
'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080'
,
init_checkpoint_modules
=
'backbone'
,
annotation_file
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'instances_val2017.json'
),
model
=
MaskRCNN
(
num_classes
=
91
,
input_size
=
[
1024
,
1024
,
3
],
include_mask
=
False
,
mask_head
=
None
,
mask_sampler
=
None
,
mask_roi_aligner
=
None
),
losses
=
Losses
(
l2_weight_decay
=
0.00004
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
parser
=
Parser
(
aug_rand_hflip
=
True
,
aug_scale_min
=
0.8
,
aug_scale_max
=
1.25
)),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
drop_remainder
=
False
)),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
22500
,
validation_steps
=
coco_val_samples
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
15000
,
20000
],
'values'
:
[
0.12
,
0.012
,
0.0012
],
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
500
,
'warmup_learning_rate'
:
0.0067
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'maskrcnn_resnetfpn_coco'
)
def
maskrcnn_resnetfpn_coco
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with Mask R-CNN."""
steps_per_epoch
=
500
coco_val_samples
=
5000
train_batch_size
=
64
eval_batch_size
=
8
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
,
enable_xla
=
True
),
task
=
MaskRCNNTask
(
init_checkpoint
=
'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080'
,
init_checkpoint_modules
=
'backbone'
,
annotation_file
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'instances_val2017.json'
),
model
=
MaskRCNN
(
num_classes
=
91
,
input_size
=
[
1024
,
1024
,
3
],
include_mask
=
True
),
losses
=
Losses
(
l2_weight_decay
=
0.00004
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
parser
=
Parser
(
aug_rand_hflip
=
True
,
aug_scale_min
=
0.8
,
aug_scale_max
=
1.25
)),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
drop_remainder
=
False
)),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
22500
,
validation_steps
=
coco_val_samples
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
15000
,
20000
],
'values'
:
[
0.12
,
0.012
,
0.0012
],
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
500
,
'warmup_learning_rate'
:
0.0067
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'maskrcnn_spinenet_coco'
)
def
maskrcnn_spinenet_coco
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with Mask R-CNN with SpineNet backbone."""
steps_per_epoch
=
463
coco_val_samples
=
5000
train_batch_size
=
256
eval_batch_size
=
8
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
),
task
=
MaskRCNNTask
(
annotation_file
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'instances_val2017.json'
),
model
=
MaskRCNN
(
backbone
=
backbones
.
Backbone
(
type
=
'spinenet'
,
spinenet
=
backbones
.
SpineNet
(
model_id
=
'49'
,
min_level
=
3
,
max_level
=
7
,
)),
decoder
=
decoders
.
Decoder
(
type
=
'identity'
,
identity
=
decoders
.
Identity
()),
anchor
=
Anchor
(
anchor_size
=
3
),
norm_activation
=
common
.
NormActivation
(
use_sync_bn
=
True
),
num_classes
=
91
,
input_size
=
[
640
,
640
,
3
],
min_level
=
3
,
max_level
=
7
,
include_mask
=
True
),
losses
=
Losses
(
l2_weight_decay
=
0.00004
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
parser
=
Parser
(
aug_rand_hflip
=
True
,
aug_scale_min
=
0.5
,
aug_scale_max
=
2.0
)),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
drop_remainder
=
False
)),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
steps_per_epoch
*
350
,
validation_steps
=
coco_val_samples
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
steps_per_epoch
*
320
,
steps_per_epoch
*
340
],
'values'
:
[
0.32
,
0.032
,
0.0032
],
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
2000
,
'warmup_learning_rate'
:
0.0067
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
,
'task.model.min_level == task.model.backbone.spinenet.min_level'
,
'task.model.max_level == task.model.backbone.spinenet.max_level'
,
])
return
config
@
exp_factory
.
register_config_factory
(
'cascadercnn_spinenet_coco'
)
def
cascadercnn_spinenet_coco
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with Cascade RCNN-RS with SpineNet backbone."""
steps_per_epoch
=
463
coco_val_samples
=
5000
train_batch_size
=
256
eval_batch_size
=
8
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
),
task
=
MaskRCNNTask
(
annotation_file
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'instances_val2017.json'
),
model
=
MaskRCNN
(
backbone
=
backbones
.
Backbone
(
type
=
'spinenet'
,
spinenet
=
backbones
.
SpineNet
(
model_id
=
'49'
,
min_level
=
3
,
max_level
=
7
,
)),
decoder
=
decoders
.
Decoder
(
type
=
'identity'
,
identity
=
decoders
.
Identity
()),
roi_sampler
=
ROISampler
(
cascade_iou_thresholds
=
[
0.6
,
0.7
]),
detection_head
=
DetectionHead
(
class_agnostic_bbox_pred
=
True
,
cascade_class_ensemble
=
True
),
anchor
=
Anchor
(
anchor_size
=
3
),
norm_activation
=
common
.
NormActivation
(
use_sync_bn
=
True
,
activation
=
'swish'
),
num_classes
=
91
,
input_size
=
[
640
,
640
,
3
],
min_level
=
3
,
max_level
=
7
,
include_mask
=
True
),
losses
=
Losses
(
l2_weight_decay
=
0.00004
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
parser
=
Parser
(
aug_rand_hflip
=
True
,
aug_scale_min
=
0.1
,
aug_scale_max
=
2.5
)),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
drop_remainder
=
False
)),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
steps_per_epoch
*
500
,
validation_steps
=
coco_val_samples
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
steps_per_epoch
*
475
,
steps_per_epoch
*
490
],
'values'
:
[
0.32
,
0.032
,
0.0032
],
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
2000
,
'warmup_learning_rate'
:
0.0067
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
,
'task.model.min_level == task.model.backbone.spinenet.min_level'
,
'task.model.max_level == task.model.backbone.spinenet.max_level'
,
])
return
config
official/vision/configs/maskrcnn_test.py
0 → 100644
View file @
c44482ab
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for maskrcnn."""
# pylint: disable=unused-import
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official
import
vision
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.vision.configs
import
maskrcnn
as
exp_cfg
class
MaskRCNNConfigTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
(
'fasterrcnn_resnetfpn_coco'
,),
(
'maskrcnn_resnetfpn_coco'
,),
(
'maskrcnn_spinenet_coco'
,),
(
'cascadercnn_spinenet_coco'
,),
)
def
test_maskrcnn_configs
(
self
,
config_name
):
config
=
exp_factory
.
get_exp_config
(
config_name
)
self
.
assertIsInstance
(
config
,
cfg
.
ExperimentConfig
)
self
.
assertIsInstance
(
config
.
task
,
exp_cfg
.
MaskRCNNTask
)
self
.
assertIsInstance
(
config
.
task
.
model
,
exp_cfg
.
MaskRCNN
)
self
.
assertIsInstance
(
config
.
task
.
train_data
,
exp_cfg
.
DataConfig
)
config
.
validate
()
config
.
task
.
train_data
.
is_training
=
None
with
self
.
assertRaisesRegex
(
KeyError
,
'Found inconsistncy between key'
):
config
.
validate
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/configs/retinanet.py
0 → 100644
View file @
c44482ab
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""RetinaNet configuration definition."""
import
dataclasses
import
os
from
typing
import
List
,
Optional
,
Union
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.modeling
import
optimization
from
official.vision.configs
import
common
from
official.vision.configs
import
decoders
from
official.vision.configs
import
backbones
# pylint: disable=missing-class-docstring
# Keep for backward compatibility.
@
dataclasses
.
dataclass
class
TfExampleDecoder
(
common
.
TfExampleDecoder
):
"""A simple TF Example decoder config."""
# Keep for backward compatibility.
@
dataclasses
.
dataclass
class
TfExampleDecoderLabelMap
(
common
.
TfExampleDecoderLabelMap
):
"""TF Example decoder with label map config."""
# Keep for backward compatibility.
@
dataclasses
.
dataclass
class
DataDecoder
(
common
.
DataDecoder
):
"""Data decoder config."""
@
dataclasses
.
dataclass
class
Parser
(
hyperparams
.
Config
):
num_channels
:
int
=
3
match_threshold
:
float
=
0.5
unmatched_threshold
:
float
=
0.5
aug_rand_hflip
:
bool
=
False
aug_scale_min
:
float
=
1.0
aug_scale_max
:
float
=
1.0
skip_crowd_during_training
:
bool
=
True
max_num_instances
:
int
=
100
# Can choose AutoAugment and RandAugment.
aug_type
:
Optional
[
common
.
Augmentation
]
=
None
# Keep for backward compatibility. Not used.
aug_policy
:
Optional
[
str
]
=
None
@
dataclasses
.
dataclass
class
DataConfig
(
cfg
.
DataConfig
):
"""Input config for training."""
input_path
:
str
=
''
global_batch_size
:
int
=
0
is_training
:
bool
=
False
dtype
:
str
=
'bfloat16'
decoder
:
common
.
DataDecoder
=
common
.
DataDecoder
()
parser
:
Parser
=
Parser
()
shuffle_buffer_size
:
int
=
10000
file_type
:
str
=
'tfrecord'
@
dataclasses
.
dataclass
class
Anchor
(
hyperparams
.
Config
):
num_scales
:
int
=
3
aspect_ratios
:
List
[
float
]
=
dataclasses
.
field
(
default_factory
=
lambda
:
[
0.5
,
1.0
,
2.0
])
anchor_size
:
float
=
4.0
@
dataclasses
.
dataclass
class
Losses
(
hyperparams
.
Config
):
loss_weight
:
float
=
1.0
focal_loss_alpha
:
float
=
0.25
focal_loss_gamma
:
float
=
1.5
huber_loss_delta
:
float
=
0.1
box_loss_weight
:
int
=
50
l2_weight_decay
:
float
=
0.0
@
dataclasses
.
dataclass
class
AttributeHead
(
hyperparams
.
Config
):
name
:
str
=
''
type
:
str
=
'regression'
size
:
int
=
1
@
dataclasses
.
dataclass
class
RetinaNetHead
(
hyperparams
.
Config
):
num_convs
:
int
=
4
num_filters
:
int
=
256
use_separable_conv
:
bool
=
False
attribute_heads
:
List
[
AttributeHead
]
=
dataclasses
.
field
(
default_factory
=
list
)
@
dataclasses
.
dataclass
class
DetectionGenerator
(
hyperparams
.
Config
):
apply_nms
:
bool
=
True
pre_nms_top_k
:
int
=
5000
pre_nms_score_threshold
:
float
=
0.05
nms_iou_threshold
:
float
=
0.5
max_num_detections
:
int
=
100
nms_version
:
str
=
'v2'
# `v2`, `v1`, `batched`, or `tflite`.
use_cpu_nms
:
bool
=
False
soft_nms_sigma
:
Optional
[
float
]
=
None
# Only works when nms_version='v1'.
# When nms_version = `tflite`, values from tflite_post_processing need to be
# specified. They are compatible with the input arguments used by TFLite
# custom NMS op and override above parameters.
tflite_post_processing
:
common
.
TFLitePostProcessingConfig
=
common
.
TFLitePostProcessingConfig
(
)
max_detections
:
int
=
200
max_classes_per_detection
:
int
=
5
# Regular NMS run in a multi-class fashion and is slow. Setting it to False
# uses class-agnostic NMS, which is faster.
use_regular_nms
:
bool
=
False
nms_score_threshold
:
float
=
0.1
@
dataclasses
.
dataclass
class
RetinaNet
(
hyperparams
.
Config
):
num_classes
:
int
=
0
input_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
min_level
:
int
=
3
max_level
:
int
=
7
anchor
:
Anchor
=
Anchor
()
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
())
decoder
:
decoders
.
Decoder
=
decoders
.
Decoder
(
type
=
'fpn'
,
fpn
=
decoders
.
FPN
())
head
:
RetinaNetHead
=
RetinaNetHead
()
detection_generator
:
DetectionGenerator
=
DetectionGenerator
()
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
()
@
dataclasses
.
dataclass
class
ExportConfig
(
hyperparams
.
Config
):
output_normalized_coordinates
:
bool
=
False
cast_num_detections_to_float
:
bool
=
False
cast_detection_classes_to_float
:
bool
=
False
@
dataclasses
.
dataclass
class
RetinaNetTask
(
cfg
.
TaskConfig
):
model
:
RetinaNet
=
RetinaNet
()
train_data
:
DataConfig
=
DataConfig
(
is_training
=
True
)
validation_data
:
DataConfig
=
DataConfig
(
is_training
=
False
)
losses
:
Losses
=
Losses
()
init_checkpoint
:
Optional
[
str
]
=
None
init_checkpoint_modules
:
Union
[
str
,
List
[
str
]]
=
'all'
# all, backbone, and/or decoder
annotation_file
:
Optional
[
str
]
=
None
per_category_metrics
:
bool
=
False
export_config
:
ExportConfig
=
ExportConfig
()
# If set, the COCO metrics will be computed.
use_coco_metrics
:
bool
=
True
# If set, the Waymo Open Dataset evaluator would be used.
use_wod_metrics
:
bool
=
False
@
exp_factory
.
register_config_factory
(
'retinanet'
)
def
retinanet
()
->
cfg
.
ExperimentConfig
:
"""RetinaNet general config."""
return
cfg
.
ExperimentConfig
(
task
=
RetinaNetTask
(),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
COCO_INPUT_PATH_BASE
=
'coco'
COCO_TRAIN_EXAMPLES
=
118287
COCO_VAL_EXAMPLES
=
5000
@
exp_factory
.
register_config_factory
(
'retinanet_resnetfpn_coco'
)
def
retinanet_resnetfpn_coco
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with RetinaNet."""
train_batch_size
=
256
eval_batch_size
=
8
steps_per_epoch
=
COCO_TRAIN_EXAMPLES
//
train_batch_size
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
),
task
=
RetinaNetTask
(
init_checkpoint
=
'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080'
,
init_checkpoint_modules
=
'backbone'
,
annotation_file
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'instances_val2017.json'
),
model
=
RetinaNet
(
num_classes
=
91
,
input_size
=
[
640
,
640
,
3
],
norm_activation
=
common
.
NormActivation
(
use_sync_bn
=
False
),
min_level
=
3
,
max_level
=
7
),
losses
=
Losses
(
l2_weight_decay
=
1e-4
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
parser
=
Parser
(
aug_rand_hflip
=
True
,
aug_scale_min
=
0.8
,
aug_scale_max
=
1.2
)),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
72
*
steps_per_epoch
,
validation_steps
=
COCO_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
57
*
steps_per_epoch
,
67
*
steps_per_epoch
],
'values'
:
[
0.32
*
train_batch_size
/
256.0
,
0.032
*
train_batch_size
/
256.0
,
0.0032
*
train_batch_size
/
256.0
],
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
500
,
'warmup_learning_rate'
:
0.0067
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'retinanet_spinenet_coco'
)
def
retinanet_spinenet_coco
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with RetinaNet using SpineNet backbone."""
train_batch_size
=
256
eval_batch_size
=
8
steps_per_epoch
=
COCO_TRAIN_EXAMPLES
//
train_batch_size
input_size
=
640
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'float32'
),
task
=
RetinaNetTask
(
annotation_file
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'instances_val2017.json'
),
model
=
RetinaNet
(
backbone
=
backbones
.
Backbone
(
type
=
'spinenet'
,
spinenet
=
backbones
.
SpineNet
(
model_id
=
'49'
,
stochastic_depth_drop_rate
=
0.2
,
min_level
=
3
,
max_level
=
7
)),
decoder
=
decoders
.
Decoder
(
type
=
'identity'
,
identity
=
decoders
.
Identity
()),
anchor
=
Anchor
(
anchor_size
=
3
),
norm_activation
=
common
.
NormActivation
(
use_sync_bn
=
True
,
activation
=
'swish'
),
num_classes
=
91
,
input_size
=
[
input_size
,
input_size
,
3
],
min_level
=
3
,
max_level
=
7
),
losses
=
Losses
(
l2_weight_decay
=
4e-5
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
parser
=
Parser
(
aug_rand_hflip
=
True
,
aug_scale_min
=
0.1
,
aug_scale_max
=
2.0
)),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
500
*
steps_per_epoch
,
validation_steps
=
COCO_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
475
*
steps_per_epoch
,
490
*
steps_per_epoch
],
'values'
:
[
0.32
*
train_batch_size
/
256.0
,
0.032
*
train_batch_size
/
256.0
,
0.0032
*
train_batch_size
/
256.0
],
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
2000
,
'warmup_learning_rate'
:
0.0067
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
,
'task.model.min_level == task.model.backbone.spinenet.min_level'
,
'task.model.max_level == task.model.backbone.spinenet.max_level'
,
])
return
config
@
exp_factory
.
register_config_factory
(
'retinanet_mobile_coco'
)
def
retinanet_spinenet_mobile_coco
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with mobile RetinaNet."""
train_batch_size
=
256
eval_batch_size
=
8
steps_per_epoch
=
COCO_TRAIN_EXAMPLES
//
train_batch_size
input_size
=
384
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'float32'
),
task
=
RetinaNetTask
(
annotation_file
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'instances_val2017.json'
),
model
=
RetinaNet
(
backbone
=
backbones
.
Backbone
(
type
=
'spinenet_mobile'
,
spinenet_mobile
=
backbones
.
SpineNetMobile
(
model_id
=
'49'
,
stochastic_depth_drop_rate
=
0.2
,
min_level
=
3
,
max_level
=
7
,
use_keras_upsampling_2d
=
False
)),
decoder
=
decoders
.
Decoder
(
type
=
'identity'
,
identity
=
decoders
.
Identity
()),
head
=
RetinaNetHead
(
num_filters
=
48
,
use_separable_conv
=
True
),
anchor
=
Anchor
(
anchor_size
=
3
),
norm_activation
=
common
.
NormActivation
(
use_sync_bn
=
True
,
activation
=
'swish'
),
num_classes
=
91
,
input_size
=
[
input_size
,
input_size
,
3
],
min_level
=
3
,
max_level
=
7
),
losses
=
Losses
(
l2_weight_decay
=
3e-5
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
parser
=
Parser
(
aug_rand_hflip
=
True
,
aug_scale_min
=
0.1
,
aug_scale_max
=
2.0
)),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
600
*
steps_per_epoch
,
validation_steps
=
COCO_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
575
*
steps_per_epoch
,
590
*
steps_per_epoch
],
'values'
:
[
0.32
*
train_batch_size
/
256.0
,
0.032
*
train_batch_size
/
256.0
,
0.0032
*
train_batch_size
/
256.0
],
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
2000
,
'warmup_learning_rate'
:
0.0067
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
,
])
return
config
official/vision/configs/retinanet_test.py
0 → 100644
View file @
c44482ab
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for retinanet."""
# pylint: disable=unused-import
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official
import
vision
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.vision.configs
import
retinanet
as
exp_cfg
class
RetinaNetConfigTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
(
'retinanet_resnetfpn_coco'
,),
(
'retinanet_spinenet_coco'
,),
(
'retinanet_mobile_coco'
,),
)
def
test_retinanet_configs
(
self
,
config_name
):
config
=
exp_factory
.
get_exp_config
(
config_name
)
self
.
assertIsInstance
(
config
,
cfg
.
ExperimentConfig
)
self
.
assertIsInstance
(
config
.
task
,
exp_cfg
.
RetinaNetTask
)
self
.
assertIsInstance
(
config
.
task
.
model
,
exp_cfg
.
RetinaNet
)
self
.
assertIsInstance
(
config
.
task
.
train_data
,
exp_cfg
.
DataConfig
)
config
.
validate
()
config
.
task
.
train_data
.
is_training
=
None
with
self
.
assertRaisesRegex
(
KeyError
,
'Found inconsistncy between key'
):
config
.
validate
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/configs/semantic_segmentation.py
0 → 100644
View file @
c44482ab
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
7
…
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment