Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
31ca3b97
"model/models/vscode:/vscode.git/clone" did not exist on "ffbe8e076df9e2e67aab016ea3ec64822369b725"
Commit
31ca3b97
authored
Jul 23, 2020
by
Kaushik Shivakumar
Browse files
resovle merge conflicts
parents
3e9d886d
7fcd7cba
Changes
392
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3952 additions
and
1 deletion
+3952
-1
research/object_detection/configs/tf2/ssd_efficientdet_d0_512x512_coco17_tpu-8.config
...nfigs/tf2/ssd_efficientdet_d0_512x512_coco17_tpu-8.config
+199
-0
research/object_detection/configs/tf2/ssd_efficientdet_d1_640x640_coco17_tpu-8.config
...nfigs/tf2/ssd_efficientdet_d1_640x640_coco17_tpu-8.config
+199
-0
research/object_detection/configs/tf2/ssd_efficientdet_d2_768x768_coco17_tpu-8.config
...nfigs/tf2/ssd_efficientdet_d2_768x768_coco17_tpu-8.config
+199
-0
research/object_detection/configs/tf2/ssd_efficientdet_d3_896x896_coco17_tpu-32.config
...figs/tf2/ssd_efficientdet_d3_896x896_coco17_tpu-32.config
+199
-0
research/object_detection/configs/tf2/ssd_efficientdet_d4_1024x1024_coco17_tpu-32.config
...gs/tf2/ssd_efficientdet_d4_1024x1024_coco17_tpu-32.config
+199
-0
research/object_detection/configs/tf2/ssd_efficientdet_d5_1280x1280_coco17_tpu-32.config
...gs/tf2/ssd_efficientdet_d5_1280x1280_coco17_tpu-32.config
+199
-0
research/object_detection/configs/tf2/ssd_efficientdet_d6_1408x1408_coco17_tpu-32.config
...gs/tf2/ssd_efficientdet_d6_1408x1408_coco17_tpu-32.config
+201
-0
research/object_detection/configs/tf2/ssd_efficientdet_d7_1536x1536_coco17_tpu-32.config
...gs/tf2/ssd_efficientdet_d7_1536x1536_coco17_tpu-32.config
+201
-0
research/object_detection/configs/tf2/ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8.config
...figs/tf2/ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_mobilenet_v2_320x320_coco17_tpu-8.config
.../configs/tf2/ssd_mobilenet_v2_320x320_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.config
.../tf2/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.config
+201
-0
research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config
.../tf2/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config
+201
-0
research/object_detection/configs/tf2/ssd_resnet101_v1_fpn_1024x1024_coco17_tpu-8.config
...gs/tf2/ssd_resnet101_v1_fpn_1024x1024_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.config
...figs/tf2/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.config
...gs/tf2/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config
...figs/tf2/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.config
...igs/tf2/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
...nfigs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
+197
-0
research/object_detection/core/box_predictor.py
research/object_detection/core/box_predictor.py
+1
-1
research/object_detection/core/densepose_ops.py
research/object_detection/core/densepose_ops.py
+377
-0
No files found.
research/object_detection/configs/tf2/ssd_efficientdet_d0_512x512_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with EfficientNet-b0 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d0).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b0 checkpoint.
#
# Train on TPU-8
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
add_background_class
:
false
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
3
}
}
image_resizer
{
keep_aspect_ratio_resizer
{
min_dimension
:
512
max_dimension
:
512
pad_to_max_dimension
:
true
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
64
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
decay
:
0
.
99
epsilon
:
0
.
001
}
}
num_layers_before_predictor
:
3
kernel_size
:
3
use_depthwise
:
true
}
}
feature_extractor
{
type
:
'ssd_efficientnet-b0_bifpn_keras'
bifpn
{
min_level
:
3
max_level
:
7
num_iterations
:
3
num_filters
:
64
}
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
99
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
1
.
5
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
5
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint_type
:
"classification"
batch_size
:
128
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
300000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_scale_crop_and_pad_to_square
{
output_size
:
512
scale_min
:
0
.
1
scale_max
:
2
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
:
8
e
-
2
total_steps
:
300000
warmup_learning_rate
: .
001
warmup_steps
:
2500
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
batch_size
:
1
;
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_efficientdet_d1_640x640_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with EfficientNet-b1 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d1).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b1 checkpoint.
#
# Train on TPU-8
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
add_background_class
:
false
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
3
}
}
image_resizer
{
keep_aspect_ratio_resizer
{
min_dimension
:
640
max_dimension
:
640
pad_to_max_dimension
:
true
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
88
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
decay
:
0
.
99
epsilon
:
0
.
001
}
}
num_layers_before_predictor
:
3
kernel_size
:
3
use_depthwise
:
true
}
}
feature_extractor
{
type
:
'ssd_efficientnet-b1_bifpn_keras'
bifpn
{
min_level
:
3
max_level
:
7
num_iterations
:
4
num_filters
:
88
}
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
99
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
1
.
5
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
5
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint_type
:
"classification"
batch_size
:
128
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
300000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_scale_crop_and_pad_to_square
{
output_size
:
640
scale_min
:
0
.
1
scale_max
:
2
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
:
8
e
-
2
total_steps
:
300000
warmup_learning_rate
: .
001
warmup_steps
:
2500
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
batch_size
:
1
;
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_efficientdet_d2_768x768_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with EfficientNet-b2 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d2).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b2 checkpoint.
#
# Train on TPU-8
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
add_background_class
:
false
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
3
}
}
image_resizer
{
keep_aspect_ratio_resizer
{
min_dimension
:
768
max_dimension
:
768
pad_to_max_dimension
:
true
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
112
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
decay
:
0
.
99
epsilon
:
0
.
001
}
}
num_layers_before_predictor
:
3
kernel_size
:
3
use_depthwise
:
true
}
}
feature_extractor
{
type
:
'ssd_efficientnet-b2_bifpn_keras'
bifpn
{
min_level
:
3
max_level
:
7
num_iterations
:
5
num_filters
:
112
}
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
99
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
1
.
5
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
5
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint_type
:
"classification"
batch_size
:
128
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
300000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_scale_crop_and_pad_to_square
{
output_size
:
768
scale_min
:
0
.
1
scale_max
:
2
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
:
8
e
-
2
total_steps
:
300000
warmup_learning_rate
: .
001
warmup_steps
:
2500
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
batch_size
:
1
;
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_efficientdet_d3_896x896_coco17_tpu-32.config
0 → 100644
View file @
31ca3b97
# SSD with EfficientNet-b3 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d3).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b3 checkpoint.
#
# Train on TPU-32
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
add_background_class
:
false
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
3
}
}
image_resizer
{
keep_aspect_ratio_resizer
{
min_dimension
:
896
max_dimension
:
896
pad_to_max_dimension
:
true
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
160
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
decay
:
0
.
99
epsilon
:
0
.
001
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
use_depthwise
:
true
}
}
feature_extractor
{
type
:
'ssd_efficientnet-b3_bifpn_keras'
bifpn
{
min_level
:
3
max_level
:
7
num_iterations
:
6
num_filters
:
160
}
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
99
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
1
.
5
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
5
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint_type
:
"classification"
batch_size
:
128
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
300000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_scale_crop_and_pad_to_square
{
output_size
:
896
scale_min
:
0
.
1
scale_max
:
2
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
:
8
e
-
2
total_steps
:
300000
warmup_learning_rate
: .
001
warmup_steps
:
2500
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
batch_size
:
1
;
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_efficientdet_d4_1024x1024_coco17_tpu-32.config
0 → 100644
View file @
31ca3b97
# SSD with EfficientNet-b4 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d4).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b4 checkpoint.
#
# Train on TPU-32
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
add_background_class
:
false
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
3
}
}
image_resizer
{
keep_aspect_ratio_resizer
{
min_dimension
:
1024
max_dimension
:
1024
pad_to_max_dimension
:
true
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
224
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
decay
:
0
.
99
epsilon
:
0
.
001
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
use_depthwise
:
true
}
}
feature_extractor
{
type
:
'ssd_efficientnet-b4_bifpn_keras'
bifpn
{
min_level
:
3
max_level
:
7
num_iterations
:
7
num_filters
:
224
}
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
99
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
1
.
5
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
5
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint_type
:
"classification"
batch_size
:
128
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
300000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_scale_crop_and_pad_to_square
{
output_size
:
1024
scale_min
:
0
.
1
scale_max
:
2
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
:
8
e
-
2
total_steps
:
300000
warmup_learning_rate
: .
001
warmup_steps
:
2500
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
batch_size
:
1
;
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_efficientdet_d5_1280x1280_coco17_tpu-32.config
0 → 100644
View file @
31ca3b97
# SSD with EfficientNet-b5 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d5).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b5 checkpoint.
#
# Train on TPU-32
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
add_background_class
:
false
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
3
}
}
image_resizer
{
keep_aspect_ratio_resizer
{
min_dimension
:
1280
max_dimension
:
1280
pad_to_max_dimension
:
true
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
288
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
decay
:
0
.
99
epsilon
:
0
.
001
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
use_depthwise
:
true
}
}
feature_extractor
{
type
:
'ssd_efficientnet-b5_bifpn_keras'
bifpn
{
min_level
:
3
max_level
:
7
num_iterations
:
7
num_filters
:
288
}
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
99
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
1
.
5
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
5
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint_type
:
"classification"
batch_size
:
128
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
300000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_scale_crop_and_pad_to_square
{
output_size
:
1280
scale_min
:
0
.
1
scale_max
:
2
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
:
8
e
-
2
total_steps
:
300000
warmup_learning_rate
: .
001
warmup_steps
:
2500
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
batch_size
:
1
;
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_efficientdet_d6_1408x1408_coco17_tpu-32.config
0 → 100644
View file @
31ca3b97
# SSD with EfficientNet-b6 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d6).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b6 checkpoint.
#
# Train on TPU-32
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
add_background_class
:
false
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
3
}
}
image_resizer
{
keep_aspect_ratio_resizer
{
min_dimension
:
1408
max_dimension
:
1408
pad_to_max_dimension
:
true
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
384
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
decay
:
0
.
99
epsilon
:
0
.
001
}
}
num_layers_before_predictor
:
5
kernel_size
:
3
use_depthwise
:
true
}
}
feature_extractor
{
type
:
'ssd_efficientnet-b6_bifpn_keras'
bifpn
{
min_level
:
3
max_level
:
7
num_iterations
:
8
num_filters
:
384
# Use unweighted sum for stability.
combine_method
:
'sum'
}
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
99
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
1
.
5
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
5
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint_type
:
"classification"
batch_size
:
128
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
300000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_scale_crop_and_pad_to_square
{
output_size
:
1408
scale_min
:
0
.
1
scale_max
:
2
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
:
8
e
-
2
total_steps
:
300000
warmup_learning_rate
: .
001
warmup_steps
:
2500
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
batch_size
:
1
;
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_efficientdet_d7_1536x1536_coco17_tpu-32.config
0 → 100644
View file @
31ca3b97
# SSD with EfficientNet-b6 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d7).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b6 checkpoint.
#
# Train on TPU-32
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
add_background_class
:
false
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
3
}
}
image_resizer
{
keep_aspect_ratio_resizer
{
min_dimension
:
1536
max_dimension
:
1536
pad_to_max_dimension
:
true
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
384
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
decay
:
0
.
99
epsilon
:
0
.
001
}
}
num_layers_before_predictor
:
5
kernel_size
:
3
use_depthwise
:
true
}
}
feature_extractor
{
type
:
'ssd_efficientnet-b6_bifpn_keras'
bifpn
{
min_level
:
3
max_level
:
7
num_iterations
:
8
num_filters
:
384
# Use unweighted sum for stability.
combine_method
:
'sum'
}
conv_hyperparams
{
force_use_bias
:
true
activation
:
SWISH
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
99
,
epsilon
:
0
.
001
,
}
}
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
1
.
5
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
5
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint_type
:
"classification"
batch_size
:
128
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
300000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_scale_crop_and_pad_to_square
{
output_size
:
1536
scale_min
:
0
.
1
scale_max
:
2
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
:
8
e
-
2
total_steps
:
300000
warmup_learning_rate
: .
001
warmup_steps
:
2500
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
batch_size
:
1
;
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BEE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with Mobilenet v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 29.1 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
640
width
:
640
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_mobilenet_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/mobilenet_v1.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
num_steps
:
25000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
25000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
batch_size
:
1
;
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_mobilenet_v2_320x320_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with Mobilenet v2
# Trained on COCO17, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 22.2 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
ssd_anchor_generator
{
num_layers
:
6
min_scale
:
0
.
2
max_scale
:
0
.
95
aspect_ratios
:
1
.
0
aspect_ratios
:
2
.
0
aspect_ratios
:
0
.
5
aspect_ratios
:
3
.
0
aspect_ratios
:
0
.
3333
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
300
width
:
300
}
}
box_predictor
{
convolutional_box_predictor
{
min_depth
:
0
max_depth
:
0
num_layers_before_predictor
:
0
use_dropout
:
false
dropout_keep_probability
:
0
.
8
kernel_size
:
1
box_code_size
:
4
apply_sigmoid_to_scores
:
false
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
train
:
true
,
scale
:
true
,
center
:
true
,
decay
:
0
.
97
,
epsilon
:
0
.
001
,
}
}
}
}
feature_extractor
{
type
:
'ssd_mobilenet_v2_keras'
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
train
:
true
,
scale
:
true
,
center
:
true
,
decay
:
0
.
97
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
75
,
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
delta
:
1
.
0
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/mobilenet_v2.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
512
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
num_steps
:
50000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
ssd_random_crop
{
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
8
total_steps
:
50000
warmup_learning_rate
:
0
.
13333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with Mobilenet v2 FPN-lite (go/fpn-lite) feature extractor, shared box
# predictor and focal loss (a mobile version of Retinanet).
# Retinanet: see Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 22.2 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
320
width
:
320
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
128
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
share_prediction_tower
:
true
use_depthwise
:
true
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_mobilenet_v2_fpn_keras'
use_depthwise
:
true
fpn
{
min_level
:
3
max_level
:
7
additional_layer_depth
:
128
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/mobilenet_v2.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
128
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
num_steps
:
50000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
08
total_steps
:
50000
warmup_learning_rate
: .
026666
warmup_steps
:
1000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with Mobilenet v2 FPN-lite (go/fpn-lite) feature extractor, shared box
# predictor and focal loss (a mobile version of Retinanet).
# Retinanet: see Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 28.2 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
640
width
:
640
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
128
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
share_prediction_tower
:
true
use_depthwise
:
true
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_mobilenet_v2_fpn_keras'
use_depthwise
:
true
fpn
{
min_level
:
3
max_level
:
7
additional_layer_depth
:
128
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
00004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/mobilenet_v2.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
128
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
num_steps
:
50000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
08
total_steps
:
50000
warmup_learning_rate
: .
026666
warmup_steps
:
1000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_resnet101_v1_fpn_1024x1024_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with Resnet 101 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 39.5 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
1024
width
:
1024
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet101_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet101.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
100000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
100000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with Resnet 101 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 35.4 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
640
width
:
640
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet101_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet101.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
25000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
25000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 39.6 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
1024
width
:
1024
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet152_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
100000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
100000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 35.6 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
640
width
:
640
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet152_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
25000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
25000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 38.3 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
1024
width
:
1024
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet50_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
100000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
100000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
0 → 100644
View file @
31ca3b97
# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 34.3 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
640
width
:
640
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet50_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
25000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
25000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/core/box_predictor.py
View file @
31ca3b97
...
...
@@ -134,7 +134,7 @@ class BoxPredictor(object):
pass
class
KerasBoxPredictor
(
tf
.
keras
.
Model
):
class
KerasBoxPredictor
(
tf
.
keras
.
layers
.
Layer
):
"""Keras-based BoxPredictor."""
def
__init__
(
self
,
is_training
,
num_classes
,
freeze_batchnorm
,
...
...
research/object_detection/core/densepose_ops.py
0 → 100644
View file @
31ca3b97
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DensePose operations.
DensePose part ids are represented as tensors of shape
[num_instances, num_points] and coordinates are represented as tensors of shape
[num_instances, num_points, 4] where each point holds (y, x, v, u). The location
of the DensePose sampled point is (y, x) in normalized coordinates. The surface
coordinate (in the part coordinate frame) is (v, u). Note that dim 1 of both
tensors may contain padding, since the number of sampled points per instance
is not fixed. The value `num_points` represents the maximum number of sampled
points for an instance in the example.
"""
import
os
import
scipy.io
import
tensorflow.compat.v1
as
tf
from
object_detection.utils
import
shape_utils
PART_NAMES
=
[
b
'torso_back'
,
b
'torso_front'
,
b
'right_hand'
,
b
'left_hand'
,
b
'left_foot'
,
b
'right_foot'
,
b
'right_upper_leg_back'
,
b
'left_upper_leg_back'
,
b
'right_upper_leg_front'
,
b
'left_upper_leg_front'
,
b
'right_lower_leg_back'
,
b
'left_lower_leg_back'
,
b
'right_lower_leg_front'
,
b
'left_lower_leg_front'
,
b
'left_upper_arm_back'
,
b
'right_upper_arm_back'
,
b
'left_upper_arm_front'
,
b
'right_upper_arm_front'
,
b
'left_lower_arm_back'
,
b
'right_lower_arm_back'
,
b
'left_lower_arm_front'
,
b
'right_lower_arm_front'
,
b
'right_face'
,
b
'left_face'
,
]
def
scale
(
dp_surface_coords
,
y_scale
,
x_scale
,
scope
=
None
):
"""Scales DensePose coordinates in y and x dimensions.
Args:
dp_surface_coords: a tensor of shape [num_instances, num_points, 4], with
coordinates in (y, x, v, u) format.
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4]
"""
with
tf
.
name_scope
(
scope
,
'DensePoseScale'
):
y_scale
=
tf
.
cast
(
y_scale
,
tf
.
float32
)
x_scale
=
tf
.
cast
(
x_scale
,
tf
.
float32
)
new_keypoints
=
dp_surface_coords
*
[[[
y_scale
,
x_scale
,
1
,
1
]]]
return
new_keypoints
def
clip_to_window
(
dp_surface_coords
,
window
,
scope
=
None
):
"""Clips DensePose points to a window.
This op clips any input DensePose points to a window.
Args:
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates in (y, x, v, u) format.
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window to which the op should clip the keypoints.
scope: name scope.
Returns:
new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4].
"""
with
tf
.
name_scope
(
scope
,
'DensePoseClipToWindow'
):
y
,
x
,
v
,
u
=
tf
.
split
(
value
=
dp_surface_coords
,
num_or_size_splits
=
4
,
axis
=
2
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
y
=
tf
.
maximum
(
tf
.
minimum
(
y
,
win_y_max
),
win_y_min
)
x
=
tf
.
maximum
(
tf
.
minimum
(
x
,
win_x_max
),
win_x_min
)
new_dp_surface_coords
=
tf
.
concat
([
y
,
x
,
v
,
u
],
2
)
return
new_dp_surface_coords
def
prune_outside_window
(
dp_num_points
,
dp_part_ids
,
dp_surface_coords
,
window
,
scope
=
None
):
"""Prunes DensePose points that fall outside a given window.
This function replaces points that fall outside the given window with zeros.
See also clip_to_window which clips any DensePose points that fall outside the
given window.
Note that this operation uses dynamic shapes, and therefore is not currently
suitable for TPU.
Args:
dp_num_points: a tensor of shape [num_instances] that indicates how many
(non-padded) DensePose points there are per instance.
dp_part_ids: a tensor of shape [num_instances, num_points] with DensePose
part ids. These part_ids are 0-indexed, where the first non-background
part has index 0.
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates in (y, x, v, u) format.
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window outside of which the op should prune the points.
scope: name scope.
Returns:
new_dp_num_points: a tensor of shape [num_instances] that indicates how many
(non-padded) DensePose points there are per instance after pruning.
new_dp_part_ids: a tensor of shape [num_instances, num_points] with
DensePose part ids. These part_ids are 0-indexed, where the first
non-background part has index 0.
new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates after pruning.
"""
with
tf
.
name_scope
(
scope
,
'DensePosePruneOutsideWindow'
):
y
,
x
,
_
,
_
=
tf
.
unstack
(
dp_surface_coords
,
axis
=-
1
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
num_instances
,
num_points
=
shape_utils
.
combined_static_and_dynamic_shape
(
dp_part_ids
)
dp_num_points_tiled
=
tf
.
tile
(
dp_num_points
[:,
tf
.
newaxis
],
multiples
=
[
1
,
num_points
])
range_tiled
=
tf
.
tile
(
tf
.
range
(
num_points
)[
tf
.
newaxis
,
:],
multiples
=
[
num_instances
,
1
])
valid_initial
=
range_tiled
<
dp_num_points_tiled
valid_in_window
=
tf
.
logical_and
(
tf
.
logical_and
(
y
>=
win_y_min
,
y
<=
win_y_max
),
tf
.
logical_and
(
x
>=
win_x_min
,
x
<=
win_x_max
))
valid_indices
=
tf
.
logical_and
(
valid_initial
,
valid_in_window
)
new_dp_num_points
=
tf
.
math
.
reduce_sum
(
tf
.
cast
(
valid_indices
,
tf
.
int32
),
axis
=
1
)
max_num_points
=
tf
.
math
.
reduce_max
(
new_dp_num_points
)
def
gather_and_reshuffle
(
elems
):
dp_part_ids
,
dp_surface_coords
,
valid_indices
=
elems
locs
=
tf
.
where
(
valid_indices
)[:,
0
]
valid_part_ids
=
tf
.
gather
(
dp_part_ids
,
locs
,
axis
=
0
)
valid_part_ids_padded
=
shape_utils
.
pad_or_clip_nd
(
valid_part_ids
,
output_shape
=
[
max_num_points
])
valid_surface_coords
=
tf
.
gather
(
dp_surface_coords
,
locs
,
axis
=
0
)
valid_surface_coords_padded
=
shape_utils
.
pad_or_clip_nd
(
valid_surface_coords
,
output_shape
=
[
max_num_points
,
4
])
return
[
valid_part_ids_padded
,
valid_surface_coords_padded
]
new_dp_part_ids
,
new_dp_surface_coords
=
(
shape_utils
.
static_or_dynamic_map_fn
(
gather_and_reshuffle
,
elems
=
[
dp_part_ids
,
dp_surface_coords
,
valid_indices
],
dtype
=
[
tf
.
int32
,
tf
.
float32
],
back_prop
=
False
))
return
new_dp_num_points
,
new_dp_part_ids
,
new_dp_surface_coords
def
change_coordinate_frame
(
dp_surface_coords
,
window
,
scope
=
None
):
"""Changes coordinate frame of the points to be relative to window's frame.
Given a window of the form [y_min, x_min, y_max, x_max] in normalized
coordinates, changes DensePose coordinates to be relative to this window.
An example use case is data augmentation: where we are given groundtruth
points and would like to randomly crop the image to some window. In this
case we need to change the coordinate frame of each sampled point to be
relative to this new window.
Args:
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates in (y, x, v, u) format.
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window we should change the coordinate frame to.
scope: name scope.
Returns:
new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4].
"""
with
tf
.
name_scope
(
scope
,
'DensePoseChangeCoordinateFrame'
):
win_height
=
window
[
2
]
-
window
[
0
]
win_width
=
window
[
3
]
-
window
[
1
]
new_dp_surface_coords
=
scale
(
dp_surface_coords
-
[
window
[
0
],
window
[
1
],
0
,
0
],
1.0
/
win_height
,
1.0
/
win_width
)
return
new_dp_surface_coords
def
to_normalized_coordinates
(
dp_surface_coords
,
height
,
width
,
check_range
=
True
,
scope
=
None
):
"""Converts absolute DensePose coordinates to normalized in range [0, 1].
This function raises an assertion failed error at graph execution time when
the maximum coordinate is smaller than 1.01 (which means that coordinates are
already normalized). The value 1.01 is to deal with small rounding errors.
Args:
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose absolute surface coordinates in (y, x, v, u) format.
height: Height of image.
width: Width of image.
check_range: If True, checks if the coordinates are already normalized.
scope: name scope.
Returns:
A tensor of shape [num_instances, num_points, 4] with normalized
coordinates.
"""
with
tf
.
name_scope
(
scope
,
'DensePoseToNormalizedCoordinates'
):
height
=
tf
.
cast
(
height
,
tf
.
float32
)
width
=
tf
.
cast
(
width
,
tf
.
float32
)
if
check_range
:
max_val
=
tf
.
reduce_max
(
dp_surface_coords
[:,
:,
:
2
])
max_assert
=
tf
.
Assert
(
tf
.
greater
(
max_val
,
1.01
),
[
'max value is lower than 1.01: '
,
max_val
])
with
tf
.
control_dependencies
([
max_assert
]):
width
=
tf
.
identity
(
width
)
return
scale
(
dp_surface_coords
,
1.0
/
height
,
1.0
/
width
)
def
to_absolute_coordinates
(
dp_surface_coords
,
height
,
width
,
check_range
=
True
,
scope
=
None
):
"""Converts normalized DensePose coordinates to absolute pixel coordinates.
This function raises an assertion failed error when the maximum
coordinate value is larger than 1.01 (in which case coordinates are already
absolute).
Args:
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose normalized surface coordinates in (y, x, v, u) format.
height: Height of image.
width: Width of image.
check_range: If True, checks if the coordinates are normalized or not.
scope: name scope.
Returns:
A tensor of shape [num_instances, num_points, 4] with absolute coordinates.
"""
with
tf
.
name_scope
(
scope
,
'DensePoseToAbsoluteCoordinates'
):
height
=
tf
.
cast
(
height
,
tf
.
float32
)
width
=
tf
.
cast
(
width
,
tf
.
float32
)
if
check_range
:
max_val
=
tf
.
reduce_max
(
dp_surface_coords
[:,
:,
:
2
])
max_assert
=
tf
.
Assert
(
tf
.
greater_equal
(
1.01
,
max_val
),
[
'maximum coordinate value is larger than 1.01: '
,
max_val
])
with
tf
.
control_dependencies
([
max_assert
]):
width
=
tf
.
identity
(
width
)
return
scale
(
dp_surface_coords
,
height
,
width
)
class
DensePoseHorizontalFlip
(
object
):
"""Class responsible for horizontal flipping of parts and surface coords."""
def
__init__
(
self
):
"""Constructor."""
path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
uv_symmetry_transforms_path
=
tf
.
resource_loader
.
get_path_to_datafile
(
os
.
path
.
join
(
path
,
'..'
,
'dataset_tools'
,
'densepose'
,
'UV_symmetry_transforms.mat'
))
tf
.
logging
.
info
(
'Loading DensePose symmetry transforms file from {}'
.
format
(
uv_symmetry_transforms_path
))
with
tf
.
io
.
gfile
.
GFile
(
uv_symmetry_transforms_path
,
'rb'
)
as
f
:
data
=
scipy
.
io
.
loadmat
(
f
)
# Create lookup maps which indicate how a VU coordinate changes after a
# horizontal flip.
uv_symmetry_map
=
{}
for
key
in
(
'U_transforms'
,
'V_transforms'
):
uv_symmetry_map_per_part
=
[]
for
i
in
range
(
data
[
key
].
shape
[
1
]):
# The following tensor has shape [256, 256].
map_per_part
=
tf
.
constant
(
data
[
key
][
0
,
i
],
dtype
=
tf
.
float32
)
uv_symmetry_map_per_part
.
append
(
map_per_part
)
uv_symmetry_map
[
key
]
=
tf
.
reshape
(
tf
.
stack
(
uv_symmetry_map_per_part
,
axis
=
0
),
[
-
1
])
# The following dictionary contains flattened lookup maps for the U and V
# coordinates separately. The shape of each is [24 * 256 * 256].
self
.
uv_symmetries
=
uv_symmetry_map
# Create a list of that maps part index to flipped part index (0-indexed).
part_symmetries
=
[]
for
i
,
part_name
in
enumerate
(
PART_NAMES
):
if
b
'left'
in
part_name
:
part_symmetries
.
append
(
PART_NAMES
.
index
(
part_name
.
replace
(
b
'left'
,
b
'right'
)))
elif
b
'right'
in
part_name
:
part_symmetries
.
append
(
PART_NAMES
.
index
(
part_name
.
replace
(
b
'right'
,
b
'left'
)))
else
:
part_symmetries
.
append
(
i
)
self
.
part_symmetries
=
part_symmetries
def
flip_parts_and_coords
(
self
,
part_ids
,
vu
):
"""Flips part ids and coordinates.
Args:
part_ids: a [num_instances, num_points] int32 tensor with pre-flipped part
ids. These part_ids are 0-indexed, where the first non-background part
has index 0.
vu: a [num_instances, num_points, 2] float32 tensor with pre-flipped vu
normalized coordinates.
Returns:
new_part_ids: a [num_instances, num_points] int32 tensor with post-flipped
part ids. These part_ids are 0-indexed, where the first non-background
part has index 0.
new_vu: a [num_instances, num_points, 2] float32 tensor with post-flipped
vu coordinates.
"""
num_instances
,
num_points
=
shape_utils
.
combined_static_and_dynamic_shape
(
part_ids
)
part_ids_flattened
=
tf
.
reshape
(
part_ids
,
[
-
1
])
new_part_ids_flattened
=
tf
.
gather
(
self
.
part_symmetries
,
part_ids_flattened
)
new_part_ids
=
tf
.
reshape
(
new_part_ids_flattened
,
[
num_instances
,
num_points
])
# Convert VU floating point coordinates to values in [256, 256] grid.
vu
=
tf
.
math
.
minimum
(
tf
.
math
.
maximum
(
vu
,
0.0
),
1.0
)
vu_locs
=
tf
.
cast
(
vu
*
256.
,
dtype
=
tf
.
int32
)
vu_locs_flattened
=
tf
.
reshape
(
vu_locs
,
[
-
1
,
2
])
v_locs_flattened
,
u_locs_flattened
=
tf
.
unstack
(
vu_locs_flattened
,
axis
=
1
)
# Convert vu_locs into lookup indices (in flattened part symmetries map).
symmetry_lookup_inds
=
(
part_ids_flattened
*
65536
+
256
*
v_locs_flattened
+
u_locs_flattened
)
# New VU coordinates.
v_new
=
tf
.
gather
(
self
.
uv_symmetries
[
'V_transforms'
],
symmetry_lookup_inds
)
u_new
=
tf
.
gather
(
self
.
uv_symmetries
[
'U_transforms'
],
symmetry_lookup_inds
)
new_vu_flattened
=
tf
.
stack
([
v_new
,
u_new
],
axis
=
1
)
new_vu
=
tf
.
reshape
(
new_vu_flattened
,
[
num_instances
,
num_points
,
2
])
return
new_part_ids
,
new_vu
def
flip_horizontal
(
dp_part_ids
,
dp_surface_coords
,
scope
=
None
):
"""Flips the DensePose points horizontally around the flip_point.
This operation flips dense pose annotations horizontally. Note that part ids
and surface coordinates may or may not change as a result of the flip.
Args:
dp_part_ids: a tensor of shape [num_instances, num_points] with DensePose
part ids. These part_ids are 0-indexed, where the first non-background
part has index 0.
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates in (y, x, v, u) normalized format.
scope: name scope.
Returns:
new_dp_part_ids: a tensor of shape [num_instances, num_points] with
DensePose part ids after flipping.
new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates after flipping.
"""
with
tf
.
name_scope
(
scope
,
'DensePoseFlipHorizontal'
):
# First flip x coordinate.
y
,
x
,
vu
=
tf
.
split
(
dp_surface_coords
,
num_or_size_splits
=
[
1
,
1
,
2
],
axis
=
2
)
xflipped
=
1.0
-
x
# Flip part ids and surface coordinates.
horizontal_flip
=
DensePoseHorizontalFlip
()
new_dp_part_ids
,
new_vu
=
horizontal_flip
.
flip_parts_and_coords
(
dp_part_ids
,
vu
)
new_dp_surface_coords
=
tf
.
concat
([
y
,
xflipped
,
new_vu
],
axis
=
2
)
return
new_dp_part_ids
,
new_dp_surface_coords
Prev
1
…
7
8
9
10
11
12
13
14
15
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment