Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
0225b135
Unverified
Commit
0225b135
authored
Mar 05, 2022
by
Srihari Humbarwadi
Committed by
GitHub
Mar 05, 2022
Browse files
Merge branch 'tensorflow:master' into panoptic-deeplab-modeling
parents
7479dbb8
4c571a3c
Changes
332
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1703 additions
and
0 deletions
+1703
-0
official/vision/configs/experiments/retinanet/coco_mobiledetcpu_tpu.yaml
.../configs/experiments/retinanet/coco_mobiledetcpu_tpu.yaml
+62
-0
official/vision/configs/experiments/retinanet/coco_mobilenetv2_tpu.yaml
...n/configs/experiments/retinanet/coco_mobilenetv2_tpu.yaml
+62
-0
official/vision/configs/experiments/retinanet/coco_spinenet143_tpu.yaml
...n/configs/experiments/retinanet/coco_spinenet143_tpu.yaml
+58
-0
official/vision/configs/experiments/retinanet/coco_spinenet190_tpu.yaml
...n/configs/experiments/retinanet/coco_spinenet190_tpu.yaml
+57
-0
official/vision/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml
...igs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml
+60
-0
official/vision/configs/experiments/retinanet/coco_spinenet49_tpu.yaml
...on/configs/experiments/retinanet/coco_spinenet49_tpu.yaml
+58
-0
official/vision/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml
...gs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml
+60
-0
official/vision/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml
...s/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml
+60
-0
official/vision/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
...on/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
+58
-0
official/vision/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
...figs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
+34
-0
official/vision/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml
...eriments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml
+7
-0
official/vision/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
...entation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
+78
-0
official/vision/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml
...xperiments/video_classification/k400_3d-resnet50_tpu.yaml
+88
-0
official/vision/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
...eriments/video_classification/k400_resnet3drs_50_tpu.yaml
+99
-0
official/vision/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml
...periments/video_classification/k400_slowonly16x4_tpu.yaml
+88
-0
official/vision/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
...xperiments/video_classification/k400_slowonly8x8_tpu.yaml
+88
-0
official/vision/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml
...xperiments/video_classification/k600_3d-resnet50_tpu.yaml
+88
-0
official/vision/configs/experiments/video_classification/k600_3d-resnet50g_tpu.yaml
...periments/video_classification/k600_3d-resnet50g_tpu.yaml
+112
-0
official/vision/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml
...xperiments/video_classification/k600_slowonly8x8_tpu.yaml
+88
-0
official/vision/configs/image_classification.py
official/vision/configs/image_classification.py
+398
-0
No files found.
official/vision/configs/experiments/retinanet/coco_mobiledetcpu_tpu.yaml
0 → 100644
View file @
0225b135
# --experiment_type=retinanet_mobile_coco
# COCO AP 27.0%
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
3.0e-05
model
:
anchor
:
anchor_size
:
3
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
mobilenet
:
model_id
:
'
MobileDetCPU'
filter_size_scale
:
1.0
type
:
'
mobiledet'
decoder
:
type
:
'
fpn'
fpn
:
num_filters
:
128
use_separable_conv
:
true
head
:
num_convs
:
4
num_filters
:
128
use_separable_conv
:
true
input_size
:
[
320 320
,
3
]
max_level
:
6
min_level
:
3
norm_activation
:
activation
:
'
relu6'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.5
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
263340
,
272580
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
277200
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/coco_mobilenetv2_tpu.yaml
0 → 100644
View file @
0225b135
# --experiment_type=retinanet_mobile_coco
# COCO AP 23.5%
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
3.0e-05
model
:
anchor
:
anchor_size
:
3
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
mobilenet
:
model_id
:
'
MobileNetV2'
filter_size_scale
:
1.0
type
:
'
mobilenet'
decoder
:
type
:
'
fpn'
fpn
:
num_filters
:
128
use_separable_conv
:
true
head
:
num_convs
:
4
num_filters
:
128
use_separable_conv
:
true
input_size
:
[
256
,
256
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
relu6'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.5
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
263340
,
272580
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
277200
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/coco_spinenet143_tpu.yaml
0 → 100644
View file @
0225b135
# SpineNet-143 COCO detection with protocal C config. Expecting 50.0% AP.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
4.0e-05
model
:
anchor
:
anchor_size
:
4
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
spinenet
:
stochastic_depth_drop_rate
:
0.2
model_id
:
'
143'
type
:
'
spinenet'
decoder
:
type
:
'
identity'
head
:
num_convs
:
4
num_filters
:
256
input_size
:
[
1280
,
1280
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.1
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
checkpoint_interval
:
462
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
219450
,
226380
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
231000
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/coco_spinenet190_tpu.yaml
0 → 100644
View file @
0225b135
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
4.0e-05
model
:
anchor
:
anchor_size
:
4
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
spinenet
:
stochastic_depth_drop_rate
:
0.2
model_id
:
'
190'
type
:
'
spinenet'
decoder
:
type
:
'
identity'
head
:
num_convs
:
7
num_filters
:
512
input_size
:
[
1280
,
1280
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.1
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
checkpoint_interval
:
462
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
219450
,
226380
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
231000
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml
0 → 100644
View file @
0225b135
# --experiment_type=retinanet_mobile_coco
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
3.0e-05
model
:
anchor
:
anchor_size
:
3
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
spinenet_mobile
:
stochastic_depth_drop_rate
:
0.2
model_id
:
'
49'
se_ratio
:
0.2
type
:
'
spinenet_mobile'
decoder
:
type
:
'
identity'
head
:
num_convs
:
4
num_filters
:
48
use_separable_conv
:
true
input_size
:
[
384
,
384
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.5
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
checkpoint_interval
:
462
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
263340
,
272580
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
277200
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/coco_spinenet49_tpu.yaml
0 → 100644
View file @
0225b135
# SpineNet-49 COCO detection with protocal C config. Expecting 44.2% AP.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
4.0e-05
model
:
anchor
:
anchor_size
:
3
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
spinenet
:
stochastic_depth_drop_rate
:
0.2
model_id
:
'
49'
type
:
'
spinenet'
decoder
:
type
:
'
identity'
head
:
num_convs
:
4
num_filters
:
256
input_size
:
[
640
,
640
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.1
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
checkpoint_interval
:
462
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
219450
,
226380
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
231000
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml
0 → 100644
View file @
0225b135
# --experiment_type=retinanet_mobile_coco
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
3.0e-05
model
:
anchor
:
anchor_size
:
3
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
spinenet_mobile
:
stochastic_depth_drop_rate
:
0.2
model_id
:
'
49S'
se_ratio
:
0.2
type
:
'
spinenet_mobile'
decoder
:
type
:
'
identity'
head
:
num_convs
:
4
num_filters
:
40
use_separable_conv
:
true
input_size
:
[
384
,
384
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.5
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
checkpoint_interval
:
462
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
263340
,
272580
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
277200
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml
0 → 100644
View file @
0225b135
# --experiment_type=retinanet_mobile_coco
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
3.0e-05
model
:
anchor
:
anchor_size
:
3
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
spinenet_mobile
:
stochastic_depth_drop_rate
:
0.2
model_id
:
'
49XS'
se_ratio
:
0.2
type
:
'
spinenet_mobile'
decoder
:
type
:
'
identity'
head
:
num_convs
:
4
num_filters
:
24
use_separable_conv
:
true
input_size
:
[
256
,
256
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.5
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
checkpoint_interval
:
462
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
263340
,
272580
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
277200
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
0 → 100644
View file @
0225b135
# SpineNet-96 COCO detection with protocol C config. Expecting 48.5% AP.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
losses
:
l2_weight_decay
:
4.0e-05
model
:
anchor
:
anchor_size
:
3
aspect_ratios
:
[
0.5
,
1.0
,
2.0
]
num_scales
:
3
backbone
:
spinenet
:
stochastic_depth_drop_rate
:
0.2
model_id
:
'
96'
type
:
'
spinenet'
decoder
:
type
:
'
identity'
head
:
num_convs
:
4
num_filters
:
256
input_size
:
[
1024
,
1024
,
3
]
max_level
:
7
min_level
:
3
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
dtype
:
'
bfloat16'
global_batch_size
:
256
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.1
validation_data
:
dtype
:
'
bfloat16'
global_batch_size
:
8
is_training
:
false
trainer
:
checkpoint_interval
:
462
optimizer_config
:
learning_rate
:
stepwise
:
boundaries
:
[
219450
,
226380
]
values
:
[
0.32
,
0.032
,
0.0032
]
type
:
'
stepwise'
warmup
:
linear
:
warmup_learning_rate
:
0.0067
warmup_steps
:
2000
steps_per_loop
:
462
train_steps
:
231000
validation_interval
:
462
validation_steps
:
625
official/vision/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml
0 → 100644
View file @
0225b135
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
annotation_file
:
'
'
# Can't use annotation file when tfds is used.
losses
:
l2_weight_decay
:
0.0001
model
:
num_classes
:
91
max_level
:
7
min_level
:
3
input_size
:
[
640
,
640
,
3
]
norm_activation
:
activation
:
relu
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
train_data
:
tfds_name
:
'
coco/2017'
tfds_split
:
'
train'
drop_remainder
:
true
dtype
:
bfloat16
global_batch_size
:
256
input_path
:
'
'
is_training
:
true
shuffle_buffer_size
:
1000
validation_data
:
tfds_name
:
'
coco/2017'
tfds_split
:
'
validation'
drop_remainder
:
true
dtype
:
bfloat16
global_batch_size
:
8
input_path
:
'
'
is_training
:
false
official/vision/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml
0 → 100644
View file @
0225b135
# Benchmarks runs on same instnace, change eval batch size to fit on 4x4 tpu
task
:
validation_data
:
global_batch_size
:
32
trainer
:
validation_interval
:
1560
validation_steps
:
156
official/vision/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml
0 → 100644
View file @
0225b135
# Use your own cityscapes preprocessed dataset. 79% meanIoU.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
float32'
task
:
model
:
num_classes
:
19
input_size
:
[
null
,
null
,
3
]
backbone
:
type
:
'
dilated_resnet'
dilated_resnet
:
model_id
:
101
output_stride
:
16
stem_type
:
'
v1'
se_ratio
:
0.25
stochastic_depth_drop_rate
:
0.2
multigrid
:
[
1
,
2
,
4
]
last_stage_repeats
:
1
decoder
:
aspp
:
pool_kernel_size
:
[
512
,
1024
]
head
:
feature_fusion
:
'
deeplabv3plus'
low_level
:
2
low_level_num_filters
:
48
norm_activation
:
activation
:
'
swish'
norm_epsilon
:
0.001
norm_momentum
:
0.99
use_sync_bn
:
true
losses
:
top_k_percent_pixels
:
1.0
# only backpropagate loss for the topk 100% pixels.
train_data
:
output_size
:
[
1024
,
2048
]
crop_size
:
[
512
,
1024
]
input_path
:
'
'
tfds_name
:
'
cityscapes/semantic_segmentation'
tfds_split
:
'
train'
is_training
:
true
global_batch_size
:
16
dtype
:
'
float32'
aug_rand_hflip
:
true
aug_scale_max
:
2.0
aug_scale_min
:
0.5
validation_data
:
output_size
:
[
1024
,
2048
]
input_path
:
'
'
tfds_name
:
'
cityscapes/semantic_segmentation'
tfds_split
:
'
validation'
is_training
:
false
global_batch_size
:
16
dtype
:
'
float32'
drop_remainder
:
false
resize_eval_groundtruth
:
true
trainer
:
optimizer_config
:
learning_rate
:
polynomial
:
decay_steps
:
90000
initial_learning_rate
:
0.01
power
:
0.9
type
:
polynomial
optimizer
:
sgd
:
momentum
:
0.9
type
:
sgd
warmup
:
linear
:
name
:
linear
warmup_learning_rate
:
0
warmup_steps
:
925
type
:
linear
steps_per_loop
:
185
summary_interval
:
185
train_steps
:
90000
validation_interval
:
185
validation_steps
:
31
checkpoint_interval
:
185
official/vision/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml
0 → 100644
View file @
0225b135
# 3D ResNet-50 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 77.0% top-1, 93.0% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
dropout_rate
:
0.5
norm_activation
:
use_sync_bn
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
model_id
:
50
stem_conv_temporal_kernel_size
:
5
stem_conv_temporal_stride
:
2
stem_pool_temporal_stride
:
1
train_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
32
-
224
-
224
-
3
temporal_stride
:
2
global_batch_size
:
1024
dtype
:
'
bfloat16'
shuffle_buffer_size
:
1024
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
32
-
256
-
256
-
3
temporal_stride
:
2
num_test_clips
:
10
num_test_crops
:
3
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
initial_learning_rate
:
0.8
decay_steps
:
42104
warmup
:
linear
:
warmup_steps
:
1053
train_steps
:
42104
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
0 → 100644
View file @
0225b135
# 3D ResNet-RS-50 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 78.2% top-1 accuracy.
runtime
:
mixed_precision_dtype
:
bfloat16
task
:
losses
:
l2_weight_decay
:
0.00004
label_smoothing
:
0.1
one_hot
:
true
model
:
aggregate_endpoints
:
false
backbone
:
resnet_3d_rs
:
model_id
:
50
stem_type
:
'
v1'
stem_conv_temporal_kernel_size
:
5
stem_conv_temporal_stride
:
2
stem_pool_temporal_stride
:
1
stochastic_depth_drop_rate
:
0.1
se_ratio
:
0.25
type
:
resnet_3d_rs
dropout_rate
:
0.5
model_type
:
video_classification
norm_activation
:
activation
:
relu
norm_epsilon
:
1.0e-05
norm_momentum
:
0.0
use_sync_bn
:
false
train_data
:
data_format
:
channels_last
drop_remainder
:
true
dtype
:
bfloat16
feature_shape
:
!!python/tuple
-
32
-
224
-
224
-
3
file_type
:
sstable
global_batch_size
:
1024
is_training
:
true
min_image_size
:
256
name
:
kinetics400
num_channels
:
3
num_classes
:
400
num_examples
:
215570
num_test_clips
:
1
num_test_crops
:
1
one_hot
:
true
temporal_stride
:
2
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
data_format
:
channels_last
drop_remainder
:
false
dtype
:
bfloat16
feature_shape
:
!!python/tuple
-
32
-
256
-
256
-
3
file_type
:
sstable
global_batch_size
:
64
is_training
:
false
min_image_size
:
256
name
:
kinetics400
num_channels
:
3
num_classes
:
400
num_examples
:
17706
num_test_clips
:
10
num_test_crops
:
3
one_hot
:
true
temporal_stride
:
2
trainer
:
checkpoint_interval
:
210
max_to_keep
:
3
optimizer_config
:
ema
:
average_decay
:
0.9999
trainable_weights_only
:
false
learning_rate
:
cosine
:
decay_steps
:
73682
initial_learning_rate
:
0.8
name
:
CosineDecay
type
:
cosine
warmup
:
linear
:
name
:
linear
warmup_learning_rate
:
0
warmup_steps
:
1050
type
:
linear
train_steps
:
73682
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml
0 → 100644
View file @
0225b135
# SlowOnly 16x4 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 75.6% top-1, 92.1% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
dropout_rate
:
0.5
norm_activation
:
use_sync_bn
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
model_id
:
50
stem_conv_temporal_kernel_size
:
1
stem_conv_temporal_stride
:
1
stem_pool_temporal_stride
:
1
train_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
16
-
224
-
224
-
3
temporal_stride
:
4
global_batch_size
:
1024
dtype
:
'
bfloat16'
shuffle_buffer_size
:
1024
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
16
-
256
-
256
-
3
temporal_stride
:
4
num_test_clips
:
10
num_test_crops
:
3
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
initial_learning_rate
:
0.8
decay_steps
:
42104
warmup
:
linear
:
warmup_steps
:
1053
train_steps
:
42104
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
0 → 100644
View file @
0225b135
# SlowOnly 8x8 video classification on Kinetics-400.
#
# --experiment_type=video_classification_kinetics400
# Expected accuracy: 74.1% top-1, 91.4% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
dropout_rate
:
0.5
norm_activation
:
use_sync_bn
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
model_id
:
50
stem_conv_temporal_kernel_size
:
1
stem_conv_temporal_stride
:
1
stem_pool_temporal_stride
:
1
train_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
8
-
224
-
224
-
3
temporal_stride
:
8
global_batch_size
:
1024
dtype
:
'
bfloat16'
shuffle_buffer_size
:
1024
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
name
:
kinetics400
feature_shape
:
!!python/tuple
-
8
-
256
-
256
-
3
temporal_stride
:
8
num_test_clips
:
10
num_test_crops
:
3
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
initial_learning_rate
:
0.8
decay_steps
:
42104
warmup
:
linear
:
warmup_steps
:
1053
train_steps
:
42104
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml
0 → 100644
View file @
0225b135
# 3D ResNet-50 video classification on Kinetics-600.
#
# --experiment_type=video_classification_kinetics600
# Expected accuracy: 79.5% top-1, 94.8% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
dropout_rate
:
0.5
norm_activation
:
use_sync_bn
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
model_id
:
50
stem_conv_temporal_kernel_size
:
5
stem_conv_temporal_stride
:
2
stem_pool_temporal_stride
:
1
train_data
:
name
:
kinetics600
feature_shape
:
!!python/tuple
-
32
-
224
-
224
-
3
temporal_stride
:
2
global_batch_size
:
1024
dtype
:
'
bfloat16'
shuffle_buffer_size
:
1024
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
name
:
kinetics600
feature_shape
:
!!python/tuple
-
32
-
256
-
256
-
3
temporal_stride
:
2
num_test_clips
:
10
num_test_crops
:
3
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
initial_learning_rate
:
0.8
decay_steps
:
71488
warmup
:
linear
:
warmup_steps
:
1787
train_steps
:
71488
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k600_3d-resnet50g_tpu.yaml
0 → 100644
View file @
0225b135
# 3D ResNet-50g video classification on Kinetics-600.
#
# --experiment_type=video_classification_kinetics600
# Expected accuracy: 78.7% accuracy, 93.6% top-5.
# Train on TPU: v3-128, eval on TPU: v3-32
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
init_checkpoint
:
null
init_checkpoint_modules
:
all
losses
:
l2_weight_decay
:
0.0001
label_smoothing
:
0.0
model
:
aggregate_endpoints
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
true
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
1
-
3
-
1
temporal_strides
:
1
use_self_gating
:
true
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
1
-
3
-
1
-
3
-
1
temporal_strides
:
1
use_self_gating
:
true
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
3
-
1
temporal_strides
:
1
use_self_gating
:
true
model_id
:
50
stem_conv_temporal_kernel_size
:
5
stem_conv_temporal_stride
:
2
stem_pool_temporal_stride
:
2
stem_type
:
v0
stochastic_depth_drop_rate
:
0.0
type
:
resnet_3d
dropout_rate
:
0.2
model_type
:
video_classification
norm_activation
:
activation
:
relu
norm_epsilon
:
1.0e-05
norm_momentum
:
0.9
use_sync_bn
:
false
train_data
:
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.49
aug_min_aspect_ratio
:
0.5
drop_remainder
:
true
dtype
:
'
bfloat16'
feature_shape
:
!!python/tuple
-
64
-
224
-
224
-
3
global_batch_size
:
1024
min_image_size
:
256
name
:
kinetics600
num_classes
:
600
split
:
train
validation_data
:
dtype
:
'
bfloat16'
feature_shape
:
!!python/tuple
-
250
-
224
-
224
-
3
global_batch_size
:
64
min_image_size
:
256
name
:
kinetics600
num_classes
:
600
num_examples
:
27780
num_test_clips
:
1
num_test_crops
:
1
one_hot
:
true
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
alpha
:
0.0
decay_steps
:
71400
initial_learning_rate
:
1.6
name
:
CosineDecay
type
:
cosine
warmup
:
linear
:
name
:
linear
warmup_learning_rate
:
0
warmup_steps
:
1785
type
:
linear
train_steps
:
71400
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml
0 → 100644
View file @
0225b135
# SlowOnly 8x8 video classification on Kinetics-600.
#
# --experiment_type=video_classification_kinetics600
# Expected accuracy: 77.3% top-1, 93.6% top-5.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
dropout_rate
:
0.5
norm_activation
:
use_sync_bn
:
false
backbone
:
resnet_3d
:
block_specs
:
!!python/tuple
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
1
-
1
-
1
-
1
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
-
temporal_kernel_sizes
:
!!python/tuple
-
3
-
3
-
3
temporal_strides
:
1
use_self_gating
:
false
model_id
:
50
stem_conv_temporal_kernel_size
:
1
stem_conv_temporal_stride
:
1
stem_pool_temporal_stride
:
1
train_data
:
name
:
kinetics600
feature_shape
:
!!python/tuple
-
8
-
224
-
224
-
3
temporal_stride
:
8
global_batch_size
:
1024
dtype
:
'
bfloat16'
shuffle_buffer_size
:
1024
aug_max_area_ratio
:
1.0
aug_max_aspect_ratio
:
2.0
aug_min_area_ratio
:
0.08
aug_min_aspect_ratio
:
0.5
validation_data
:
name
:
kinetics600
feature_shape
:
!!python/tuple
-
8
-
256
-
256
-
3
temporal_stride
:
8
num_test_clips
:
10
num_test_crops
:
3
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
optimizer_config
:
learning_rate
:
cosine
:
initial_learning_rate
:
0.8
decay_steps
:
71488
warmup
:
linear
:
warmup_steps
:
1787
train_steps
:
71488
steps_per_loop
:
500
summary_interval
:
500
validation_interval
:
500
official/vision/configs/image_classification.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Image classification configuration definition."""
import
dataclasses
import
os
from
typing
import
List
,
Optional
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.modeling
import
optimization
from
official.vision.configs
import
common
from
official.vision.configs
import
backbones
@
dataclasses
.
dataclass
class
DataConfig
(
cfg
.
DataConfig
):
"""Input config for training."""
input_path
:
str
=
''
global_batch_size
:
int
=
0
is_training
:
bool
=
True
dtype
:
str
=
'float32'
shuffle_buffer_size
:
int
=
10000
cycle_length
:
int
=
10
is_multilabel
:
bool
=
False
aug_rand_hflip
:
bool
=
True
aug_type
:
Optional
[
common
.
Augmentation
]
=
None
# Choose from AutoAugment and RandAugment.
color_jitter
:
float
=
0.
random_erasing
:
Optional
[
common
.
RandomErasing
]
=
None
file_type
:
str
=
'tfrecord'
image_field_key
:
str
=
'image/encoded'
label_field_key
:
str
=
'image/class/label'
decode_jpeg_only
:
bool
=
True
mixup_and_cutmix
:
Optional
[
common
.
MixupAndCutmix
]
=
None
decoder
:
Optional
[
common
.
DataDecoder
]
=
common
.
DataDecoder
()
# Keep for backward compatibility.
aug_policy
:
Optional
[
str
]
=
None
# None, 'autoaug', or 'randaug'.
randaug_magnitude
:
Optional
[
int
]
=
10
@
dataclasses
.
dataclass
class
ImageClassificationModel
(
hyperparams
.
Config
):
"""The model config."""
num_classes
:
int
=
0
input_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
())
dropout_rate
:
float
=
0.0
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
(
use_sync_bn
=
False
)
# Adds a BatchNormalization layer pre-GlobalAveragePooling in classification
add_head_batch_norm
:
bool
=
False
kernel_initializer
:
str
=
'random_uniform'
@
dataclasses
.
dataclass
class
Losses
(
hyperparams
.
Config
):
loss_weight
:
float
=
1.0
one_hot
:
bool
=
True
label_smoothing
:
float
=
0.0
l2_weight_decay
:
float
=
0.0
soft_labels
:
bool
=
False
@
dataclasses
.
dataclass
class
Evaluation
(
hyperparams
.
Config
):
top_k
:
int
=
5
@
dataclasses
.
dataclass
class
ImageClassificationTask
(
cfg
.
TaskConfig
):
"""The task config."""
model
:
ImageClassificationModel
=
ImageClassificationModel
()
train_data
:
DataConfig
=
DataConfig
(
is_training
=
True
)
validation_data
:
DataConfig
=
DataConfig
(
is_training
=
False
)
losses
:
Losses
=
Losses
()
evaluation
:
Evaluation
=
Evaluation
()
init_checkpoint
:
Optional
[
str
]
=
None
init_checkpoint_modules
:
str
=
'all'
# all or backbone
model_output_keys
:
Optional
[
List
[
int
]]
=
dataclasses
.
field
(
default_factory
=
list
)
@
exp_factory
.
register_config_factory
(
'image_classification'
)
def
image_classification
()
->
cfg
.
ExperimentConfig
:
"""Image classification general."""
return
cfg
.
ExperimentConfig
(
task
=
ImageClassificationTask
(),
trainer
=
cfg
.
TrainerConfig
(),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
IMAGENET_TRAIN_EXAMPLES
=
1281167
IMAGENET_VAL_EXAMPLES
=
50000
IMAGENET_INPUT_PATH_BASE
=
'imagenet-2012-tfrecord'
@
exp_factory
.
register_config_factory
(
'resnet_imagenet'
)
def
image_classification_imagenet
()
->
cfg
.
ExperimentConfig
:
"""Image classification on imagenet with resnet."""
train_batch_size
=
4096
eval_batch_size
=
4096
steps_per_epoch
=
IMAGENET_TRAIN_EXAMPLES
//
train_batch_size
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
enable_xla
=
True
),
task
=
ImageClassificationTask
(
model
=
ImageClassificationModel
(
num_classes
=
1001
,
input_size
=
[
224
,
224
,
3
],
backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
50
)),
norm_activation
=
common
.
NormActivation
(
norm_momentum
=
0.9
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
)),
losses
=
Losses
(
l2_weight_decay
=
1e-4
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'valid*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
train_steps
=
90
*
steps_per_epoch
,
validation_steps
=
IMAGENET_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
30
*
steps_per_epoch
,
60
*
steps_per_epoch
,
80
*
steps_per_epoch
],
'values'
:
[
0.1
*
train_batch_size
/
256
,
0.01
*
train_batch_size
/
256
,
0.001
*
train_batch_size
/
256
,
0.0001
*
train_batch_size
/
256
,
]
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
5
*
steps_per_epoch
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'resnet_rs_imagenet'
)
def
image_classification_imagenet_resnetrs
()
->
cfg
.
ExperimentConfig
:
"""Image classification on imagenet with resnet-rs."""
train_batch_size
=
4096
eval_batch_size
=
4096
steps_per_epoch
=
IMAGENET_TRAIN_EXAMPLES
//
train_batch_size
config
=
cfg
.
ExperimentConfig
(
task
=
ImageClassificationTask
(
model
=
ImageClassificationModel
(
num_classes
=
1001
,
input_size
=
[
160
,
160
,
3
],
backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
(
model_id
=
50
,
stem_type
=
'v1'
,
resnetd_shortcut
=
True
,
replace_stem_max_pool
=
True
,
se_ratio
=
0.25
,
stochastic_depth_drop_rate
=
0.0
)),
dropout_rate
=
0.25
,
norm_activation
=
common
.
NormActivation
(
norm_momentum
=
0.0
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
,
activation
=
'swish'
)),
losses
=
Losses
(
l2_weight_decay
=
4e-5
,
label_smoothing
=
0.1
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
aug_type
=
common
.
Augmentation
(
type
=
'randaug'
,
randaug
=
common
.
RandAugment
(
magnitude
=
10
))),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'valid*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
train_steps
=
350
*
steps_per_epoch
,
validation_steps
=
IMAGENET_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'ema'
:
{
'average_decay'
:
0.9999
,
'trainable_weights_only'
:
False
,
},
'learning_rate'
:
{
'type'
:
'cosine'
,
'cosine'
:
{
'initial_learning_rate'
:
1.6
,
'decay_steps'
:
350
*
steps_per_epoch
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
5
*
steps_per_epoch
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'revnet_imagenet'
)
def
image_classification_imagenet_revnet
()
->
cfg
.
ExperimentConfig
:
"""Returns a revnet config for image classification on imagenet."""
train_batch_size
=
4096
eval_batch_size
=
4096
steps_per_epoch
=
IMAGENET_TRAIN_EXAMPLES
//
train_batch_size
config
=
cfg
.
ExperimentConfig
(
task
=
ImageClassificationTask
(
model
=
ImageClassificationModel
(
num_classes
=
1001
,
input_size
=
[
224
,
224
,
3
],
backbone
=
backbones
.
Backbone
(
type
=
'revnet'
,
revnet
=
backbones
.
RevNet
(
model_id
=
56
)),
norm_activation
=
common
.
NormActivation
(
norm_momentum
=
0.9
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
False
),
add_head_batch_norm
=
True
),
losses
=
Losses
(
l2_weight_decay
=
1e-4
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'valid*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
train_steps
=
90
*
steps_per_epoch
,
validation_steps
=
IMAGENET_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
30
*
steps_per_epoch
,
60
*
steps_per_epoch
,
80
*
steps_per_epoch
],
'values'
:
[
0.8
,
0.08
,
0.008
,
0.0008
]
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
5
*
steps_per_epoch
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
@
exp_factory
.
register_config_factory
(
'mobilenet_imagenet'
)
def
image_classification_imagenet_mobilenet
()
->
cfg
.
ExperimentConfig
:
"""Image classification on imagenet with mobilenet."""
train_batch_size
=
4096
eval_batch_size
=
4096
steps_per_epoch
=
IMAGENET_TRAIN_EXAMPLES
//
train_batch_size
config
=
cfg
.
ExperimentConfig
(
task
=
ImageClassificationTask
(
model
=
ImageClassificationModel
(
num_classes
=
1001
,
dropout_rate
=
0.2
,
input_size
=
[
224
,
224
,
3
],
backbone
=
backbones
.
Backbone
(
type
=
'mobilenet'
,
mobilenet
=
backbones
.
MobileNet
(
model_id
=
'MobileNetV2'
,
filter_size_scale
=
1.0
)),
norm_activation
=
common
.
NormActivation
(
norm_momentum
=
0.997
,
norm_epsilon
=
1e-3
,
use_sync_bn
=
False
)),
losses
=
Losses
(
l2_weight_decay
=
1e-5
,
label_smoothing
=
0.1
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
IMAGENET_INPUT_PATH_BASE
,
'valid*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
train_steps
=
500
*
steps_per_epoch
,
validation_steps
=
IMAGENET_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'rmsprop'
,
'rmsprop'
:
{
'rho'
:
0.9
,
'momentum'
:
0.9
,
'epsilon'
:
0.002
,
}
},
'learning_rate'
:
{
'type'
:
'exponential'
,
'exponential'
:
{
'initial_learning_rate'
:
0.008
*
(
train_batch_size
//
128
),
'decay_steps'
:
int
(
2.5
*
steps_per_epoch
),
'decay_rate'
:
0.98
,
'staircase'
:
True
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
5
*
steps_per_epoch
,
'warmup_learning_rate'
:
0
}
},
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
Prev
1
…
3
4
5
6
7
8
9
10
11
…
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment