Commit 31ca3b97 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

resovle merge conflicts

parents 3e9d886d 7fcd7cba
# SSD with EfficientNet-b0 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d0).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b0 checkpoint.
#
# Train on TPU-8
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
add_background_class: false
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 3
}
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 512
max_dimension: 512
pad_to_max_dimension: true
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 64
class_prediction_bias_init: -4.6
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true
decay: 0.99
epsilon: 0.001
}
}
num_layers_before_predictor: 3
kernel_size: 3
use_depthwise: true
}
}
feature_extractor {
type: 'ssd_efficientnet-b0_bifpn_keras'
bifpn {
min_level: 3
max_level: 7
num_iterations: 3
num_filters: 64
}
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 1.5
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version: V2
fine_tune_checkpoint_type: "classification"
batch_size: 128
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 300000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 512
scale_min: 0.1
scale_max: 2.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 8e-2
total_steps: 300000
warmup_learning_rate: .001
warmup_steps: 2500
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with EfficientNet-b1 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d1).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b1 checkpoint.
#
# Train on TPU-8
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
add_background_class: false
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 3
}
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 640
max_dimension: 640
pad_to_max_dimension: true
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 88
class_prediction_bias_init: -4.6
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true
decay: 0.99
epsilon: 0.001
}
}
num_layers_before_predictor: 3
kernel_size: 3
use_depthwise: true
}
}
feature_extractor {
type: 'ssd_efficientnet-b1_bifpn_keras'
bifpn {
min_level: 3
max_level: 7
num_iterations: 4
num_filters: 88
}
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 1.5
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version: V2
fine_tune_checkpoint_type: "classification"
batch_size: 128
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 300000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 640
scale_min: 0.1
scale_max: 2.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 8e-2
total_steps: 300000
warmup_learning_rate: .001
warmup_steps: 2500
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with EfficientNet-b2 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d2).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b2 checkpoint.
#
# Train on TPU-8
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
add_background_class: false
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 3
}
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 768
max_dimension: 768
pad_to_max_dimension: true
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 112
class_prediction_bias_init: -4.6
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true
decay: 0.99
epsilon: 0.001
}
}
num_layers_before_predictor: 3
kernel_size: 3
use_depthwise: true
}
}
feature_extractor {
type: 'ssd_efficientnet-b2_bifpn_keras'
bifpn {
min_level: 3
max_level: 7
num_iterations: 5
num_filters: 112
}
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 1.5
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version: V2
fine_tune_checkpoint_type: "classification"
batch_size: 128
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 300000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 768
scale_min: 0.1
scale_max: 2.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 8e-2
total_steps: 300000
warmup_learning_rate: .001
warmup_steps: 2500
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with EfficientNet-b3 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d3).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b3 checkpoint.
#
# Train on TPU-32
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
add_background_class: false
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 3
}
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 896
max_dimension: 896
pad_to_max_dimension: true
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 160
class_prediction_bias_init: -4.6
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true
decay: 0.99
epsilon: 0.001
}
}
num_layers_before_predictor: 4
kernel_size: 3
use_depthwise: true
}
}
feature_extractor {
type: 'ssd_efficientnet-b3_bifpn_keras'
bifpn {
min_level: 3
max_level: 7
num_iterations: 6
num_filters: 160
}
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 1.5
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version: V2
fine_tune_checkpoint_type: "classification"
batch_size: 128
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 300000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 896
scale_min: 0.1
scale_max: 2.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 8e-2
total_steps: 300000
warmup_learning_rate: .001
warmup_steps: 2500
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with EfficientNet-b4 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d4).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b4 checkpoint.
#
# Train on TPU-32
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
add_background_class: false
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 3
}
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 1024
max_dimension: 1024
pad_to_max_dimension: true
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 224
class_prediction_bias_init: -4.6
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true
decay: 0.99
epsilon: 0.001
}
}
num_layers_before_predictor: 4
kernel_size: 3
use_depthwise: true
}
}
feature_extractor {
type: 'ssd_efficientnet-b4_bifpn_keras'
bifpn {
min_level: 3
max_level: 7
num_iterations: 7
num_filters: 224
}
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 1.5
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version: V2
fine_tune_checkpoint_type: "classification"
batch_size: 128
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 300000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 1024
scale_min: 0.1
scale_max: 2.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 8e-2
total_steps: 300000
warmup_learning_rate: .001
warmup_steps: 2500
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with EfficientNet-b5 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d5).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b5 checkpoint.
#
# Train on TPU-32
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
add_background_class: false
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 3
}
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 1280
max_dimension: 1280
pad_to_max_dimension: true
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 288
class_prediction_bias_init: -4.6
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true
decay: 0.99
epsilon: 0.001
}
}
num_layers_before_predictor: 4
kernel_size: 3
use_depthwise: true
}
}
feature_extractor {
type: 'ssd_efficientnet-b5_bifpn_keras'
bifpn {
min_level: 3
max_level: 7
num_iterations: 7
num_filters: 288
}
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 1.5
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version: V2
fine_tune_checkpoint_type: "classification"
batch_size: 128
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 300000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 1280
scale_min: 0.1
scale_max: 2.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 8e-2
total_steps: 300000
warmup_learning_rate: .001
warmup_steps: 2500
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with EfficientNet-b6 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d6).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b6 checkpoint.
#
# Train on TPU-32
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
add_background_class: false
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 3
}
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 1408
max_dimension: 1408
pad_to_max_dimension: true
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 384
class_prediction_bias_init: -4.6
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true
decay: 0.99
epsilon: 0.001
}
}
num_layers_before_predictor: 5
kernel_size: 3
use_depthwise: true
}
}
feature_extractor {
type: 'ssd_efficientnet-b6_bifpn_keras'
bifpn {
min_level: 3
max_level: 7
num_iterations: 8
num_filters: 384
# Use unweighted sum for stability.
combine_method: 'sum'
}
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 1.5
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version: V2
fine_tune_checkpoint_type: "classification"
batch_size: 128
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 300000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 1408
scale_min: 0.1
scale_max: 2.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 8e-2
total_steps: 300000
warmup_learning_rate: .001
warmup_steps: 2500
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with EfficientNet-b6 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d7).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b6 checkpoint.
#
# Train on TPU-32
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
add_background_class: false
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 3
}
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 1536
max_dimension: 1536
pad_to_max_dimension: true
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 384
class_prediction_bias_init: -4.6
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true
decay: 0.99
epsilon: 0.001
}
}
num_layers_before_predictor: 5
kernel_size: 3
use_depthwise: true
}
}
feature_extractor {
type: 'ssd_efficientnet-b6_bifpn_keras'
bifpn {
min_level: 3
max_level: 7
num_iterations: 8
num_filters: 384
# Use unweighted sum for stability.
combine_method: 'sum'
}
conv_hyperparams {
force_use_bias: true
activation: SWISH
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.99,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 1.5
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-0"
fine_tune_checkpoint_version: V2
fine_tune_checkpoint_type: "classification"
batch_size: 128
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 300000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 1536
scale_min: 0.1
scale_max: 2.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 8e-2
total_steps: 300000
warmup_learning_rate: .001
warmup_steps: 2500
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BEE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Mobilenet v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 29.1 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_mobilenet_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/mobilenet_v1.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
num_steps: 25000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 25000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Mobilenet v2
# Trained on COCO17, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 22.2 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 1
box_code_size: 4
apply_sigmoid_to_scores: false
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.97,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_mobilenet_v2_keras'
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.97,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.75,
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
delta: 1.0
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/mobilenet_v2.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 512
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
num_steps: 50000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .8
total_steps: 50000
warmup_learning_rate: 0.13333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Mobilenet v2 FPN-lite (go/fpn-lite) feature extractor, shared box
# predictor and focal loss (a mobile version of Retinanet).
# Retinanet: see Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 22.2 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 128
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
share_prediction_tower: true
use_depthwise: true
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_mobilenet_v2_fpn_keras'
use_depthwise: true
fpn {
min_level: 3
max_level: 7
additional_layer_depth: 128
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/mobilenet_v2.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 128
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
num_steps: 50000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .08
total_steps: 50000
warmup_learning_rate: .026666
warmup_steps: 1000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Mobilenet v2 FPN-lite (go/fpn-lite) feature extractor, shared box
# predictor and focal loss (a mobile version of Retinanet).
# Retinanet: see Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 28.2 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 128
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
share_prediction_tower: true
use_depthwise: true
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_mobilenet_v2_fpn_keras'
use_depthwise: true
fpn {
min_level: 3
max_level: 7
additional_layer_depth: 128
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/mobilenet_v2.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 128
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
num_steps: 50000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .08
total_steps: 50000
warmup_learning_rate: .026666
warmup_steps: 1000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Resnet 101 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 39.5 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 1024
width: 1024
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_resnet101_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet101.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 100000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 100000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Resnet 101 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 35.4 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_resnet101_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet101.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 25000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 25000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 39.6 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 1024
width: 1024
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_resnet152_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 100000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 100000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 35.6 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_resnet152_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 25000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 25000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 38.3 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 1024
width: 1024
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_resnet50_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 100000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 100000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 34.3 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_resnet50_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 25000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 25000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
......@@ -134,7 +134,7 @@ class BoxPredictor(object):
pass
class KerasBoxPredictor(tf.keras.Model):
class KerasBoxPredictor(tf.keras.layers.Layer):
"""Keras-based BoxPredictor."""
def __init__(self, is_training, num_classes, freeze_batchnorm,
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DensePose operations.
DensePose part ids are represented as tensors of shape
[num_instances, num_points] and coordinates are represented as tensors of shape
[num_instances, num_points, 4] where each point holds (y, x, v, u). The location
of the DensePose sampled point is (y, x) in normalized coordinates. The surface
coordinate (in the part coordinate frame) is (v, u). Note that dim 1 of both
tensors may contain padding, since the number of sampled points per instance
is not fixed. The value `num_points` represents the maximum number of sampled
points for an instance in the example.
"""
import os
import scipy.io
import tensorflow.compat.v1 as tf
from object_detection.utils import shape_utils
PART_NAMES = [
b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
b'left_face',
]
def scale(dp_surface_coords, y_scale, x_scale, scope=None):
"""Scales DensePose coordinates in y and x dimensions.
Args:
dp_surface_coords: a tensor of shape [num_instances, num_points, 4], with
coordinates in (y, x, v, u) format.
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4]
"""
with tf.name_scope(scope, 'DensePoseScale'):
y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32)
new_keypoints = dp_surface_coords * [[[y_scale, x_scale, 1, 1]]]
return new_keypoints
def clip_to_window(dp_surface_coords, window, scope=None):
"""Clips DensePose points to a window.
This op clips any input DensePose points to a window.
Args:
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates in (y, x, v, u) format.
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window to which the op should clip the keypoints.
scope: name scope.
Returns:
new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4].
"""
with tf.name_scope(scope, 'DensePoseClipToWindow'):
y, x, v, u = tf.split(value=dp_surface_coords, num_or_size_splits=4, axis=2)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
y = tf.maximum(tf.minimum(y, win_y_max), win_y_min)
x = tf.maximum(tf.minimum(x, win_x_max), win_x_min)
new_dp_surface_coords = tf.concat([y, x, v, u], 2)
return new_dp_surface_coords
def prune_outside_window(dp_num_points, dp_part_ids, dp_surface_coords, window,
scope=None):
"""Prunes DensePose points that fall outside a given window.
This function replaces points that fall outside the given window with zeros.
See also clip_to_window which clips any DensePose points that fall outside the
given window.
Note that this operation uses dynamic shapes, and therefore is not currently
suitable for TPU.
Args:
dp_num_points: a tensor of shape [num_instances] that indicates how many
(non-padded) DensePose points there are per instance.
dp_part_ids: a tensor of shape [num_instances, num_points] with DensePose
part ids. These part_ids are 0-indexed, where the first non-background
part has index 0.
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates in (y, x, v, u) format.
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window outside of which the op should prune the points.
scope: name scope.
Returns:
new_dp_num_points: a tensor of shape [num_instances] that indicates how many
(non-padded) DensePose points there are per instance after pruning.
new_dp_part_ids: a tensor of shape [num_instances, num_points] with
DensePose part ids. These part_ids are 0-indexed, where the first
non-background part has index 0.
new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates after pruning.
"""
with tf.name_scope(scope, 'DensePosePruneOutsideWindow'):
y, x, _, _ = tf.unstack(dp_surface_coords, axis=-1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
num_instances, num_points = shape_utils.combined_static_and_dynamic_shape(
dp_part_ids)
dp_num_points_tiled = tf.tile(dp_num_points[:, tf.newaxis],
multiples=[1, num_points])
range_tiled = tf.tile(tf.range(num_points)[tf.newaxis, :],
multiples=[num_instances, 1])
valid_initial = range_tiled < dp_num_points_tiled
valid_in_window = tf.logical_and(
tf.logical_and(y >= win_y_min, y <= win_y_max),
tf.logical_and(x >= win_x_min, x <= win_x_max))
valid_indices = tf.logical_and(valid_initial, valid_in_window)
new_dp_num_points = tf.math.reduce_sum(
tf.cast(valid_indices, tf.int32), axis=1)
max_num_points = tf.math.reduce_max(new_dp_num_points)
def gather_and_reshuffle(elems):
dp_part_ids, dp_surface_coords, valid_indices = elems
locs = tf.where(valid_indices)[:, 0]
valid_part_ids = tf.gather(dp_part_ids, locs, axis=0)
valid_part_ids_padded = shape_utils.pad_or_clip_nd(
valid_part_ids, output_shape=[max_num_points])
valid_surface_coords = tf.gather(dp_surface_coords, locs, axis=0)
valid_surface_coords_padded = shape_utils.pad_or_clip_nd(
valid_surface_coords, output_shape=[max_num_points, 4])
return [valid_part_ids_padded, valid_surface_coords_padded]
new_dp_part_ids, new_dp_surface_coords = (
shape_utils.static_or_dynamic_map_fn(
gather_and_reshuffle,
elems=[dp_part_ids, dp_surface_coords, valid_indices],
dtype=[tf.int32, tf.float32],
back_prop=False))
return new_dp_num_points, new_dp_part_ids, new_dp_surface_coords
def change_coordinate_frame(dp_surface_coords, window, scope=None):
"""Changes coordinate frame of the points to be relative to window's frame.
Given a window of the form [y_min, x_min, y_max, x_max] in normalized
coordinates, changes DensePose coordinates to be relative to this window.
An example use case is data augmentation: where we are given groundtruth
points and would like to randomly crop the image to some window. In this
case we need to change the coordinate frame of each sampled point to be
relative to this new window.
Args:
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates in (y, x, v, u) format.
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window we should change the coordinate frame to.
scope: name scope.
Returns:
new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4].
"""
with tf.name_scope(scope, 'DensePoseChangeCoordinateFrame'):
win_height = window[2] - window[0]
win_width = window[3] - window[1]
new_dp_surface_coords = scale(
dp_surface_coords - [window[0], window[1], 0, 0],
1.0 / win_height, 1.0 / win_width)
return new_dp_surface_coords
def to_normalized_coordinates(dp_surface_coords, height, width,
check_range=True, scope=None):
"""Converts absolute DensePose coordinates to normalized in range [0, 1].
This function raises an assertion failed error at graph execution time when
the maximum coordinate is smaller than 1.01 (which means that coordinates are
already normalized). The value 1.01 is to deal with small rounding errors.
Args:
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose absolute surface coordinates in (y, x, v, u) format.
height: Height of image.
width: Width of image.
check_range: If True, checks if the coordinates are already normalized.
scope: name scope.
Returns:
A tensor of shape [num_instances, num_points, 4] with normalized
coordinates.
"""
with tf.name_scope(scope, 'DensePoseToNormalizedCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
if check_range:
max_val = tf.reduce_max(dp_surface_coords[:, :, :2])
max_assert = tf.Assert(tf.greater(max_val, 1.01),
['max value is lower than 1.01: ', max_val])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
return scale(dp_surface_coords, 1.0 / height, 1.0 / width)
def to_absolute_coordinates(dp_surface_coords, height, width,
check_range=True, scope=None):
"""Converts normalized DensePose coordinates to absolute pixel coordinates.
This function raises an assertion failed error when the maximum
coordinate value is larger than 1.01 (in which case coordinates are already
absolute).
Args:
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose normalized surface coordinates in (y, x, v, u) format.
height: Height of image.
width: Width of image.
check_range: If True, checks if the coordinates are normalized or not.
scope: name scope.
Returns:
A tensor of shape [num_instances, num_points, 4] with absolute coordinates.
"""
with tf.name_scope(scope, 'DensePoseToAbsoluteCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
if check_range:
max_val = tf.reduce_max(dp_surface_coords[:, :, :2])
max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
['maximum coordinate value is larger than 1.01: ',
max_val])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
return scale(dp_surface_coords, height, width)
class DensePoseHorizontalFlip(object):
"""Class responsible for horizontal flipping of parts and surface coords."""
def __init__(self):
"""Constructor."""
path = os.path.dirname(os.path.abspath(__file__))
uv_symmetry_transforms_path = tf.resource_loader.get_path_to_datafile(
os.path.join(path, '..', 'dataset_tools', 'densepose',
'UV_symmetry_transforms.mat'))
tf.logging.info('Loading DensePose symmetry transforms file from {}'.format(
uv_symmetry_transforms_path))
with tf.io.gfile.GFile(uv_symmetry_transforms_path, 'rb') as f:
data = scipy.io.loadmat(f)
# Create lookup maps which indicate how a VU coordinate changes after a
# horizontal flip.
uv_symmetry_map = {}
for key in ('U_transforms', 'V_transforms'):
uv_symmetry_map_per_part = []
for i in range(data[key].shape[1]):
# The following tensor has shape [256, 256].
map_per_part = tf.constant(data[key][0, i], dtype=tf.float32)
uv_symmetry_map_per_part.append(map_per_part)
uv_symmetry_map[key] = tf.reshape(
tf.stack(uv_symmetry_map_per_part, axis=0), [-1])
# The following dictionary contains flattened lookup maps for the U and V
# coordinates separately. The shape of each is [24 * 256 * 256].
self.uv_symmetries = uv_symmetry_map
# Create a list of that maps part index to flipped part index (0-indexed).
part_symmetries = []
for i, part_name in enumerate(PART_NAMES):
if b'left' in part_name:
part_symmetries.append(PART_NAMES.index(
part_name.replace(b'left', b'right')))
elif b'right' in part_name:
part_symmetries.append(PART_NAMES.index(
part_name.replace(b'right', b'left')))
else:
part_symmetries.append(i)
self.part_symmetries = part_symmetries
def flip_parts_and_coords(self, part_ids, vu):
"""Flips part ids and coordinates.
Args:
part_ids: a [num_instances, num_points] int32 tensor with pre-flipped part
ids. These part_ids are 0-indexed, where the first non-background part
has index 0.
vu: a [num_instances, num_points, 2] float32 tensor with pre-flipped vu
normalized coordinates.
Returns:
new_part_ids: a [num_instances, num_points] int32 tensor with post-flipped
part ids. These part_ids are 0-indexed, where the first non-background
part has index 0.
new_vu: a [num_instances, num_points, 2] float32 tensor with post-flipped
vu coordinates.
"""
num_instances, num_points = shape_utils.combined_static_and_dynamic_shape(
part_ids)
part_ids_flattened = tf.reshape(part_ids, [-1])
new_part_ids_flattened = tf.gather(self.part_symmetries, part_ids_flattened)
new_part_ids = tf.reshape(new_part_ids_flattened,
[num_instances, num_points])
# Convert VU floating point coordinates to values in [256, 256] grid.
vu = tf.math.minimum(tf.math.maximum(vu, 0.0), 1.0)
vu_locs = tf.cast(vu * 256., dtype=tf.int32)
vu_locs_flattened = tf.reshape(vu_locs, [-1, 2])
v_locs_flattened, u_locs_flattened = tf.unstack(vu_locs_flattened, axis=1)
# Convert vu_locs into lookup indices (in flattened part symmetries map).
symmetry_lookup_inds = (
part_ids_flattened * 65536 + 256 * v_locs_flattened + u_locs_flattened)
# New VU coordinates.
v_new = tf.gather(self.uv_symmetries['V_transforms'], symmetry_lookup_inds)
u_new = tf.gather(self.uv_symmetries['U_transforms'], symmetry_lookup_inds)
new_vu_flattened = tf.stack([v_new, u_new], axis=1)
new_vu = tf.reshape(new_vu_flattened, [num_instances, num_points, 2])
return new_part_ids, new_vu
def flip_horizontal(dp_part_ids, dp_surface_coords, scope=None):
"""Flips the DensePose points horizontally around the flip_point.
This operation flips dense pose annotations horizontally. Note that part ids
and surface coordinates may or may not change as a result of the flip.
Args:
dp_part_ids: a tensor of shape [num_instances, num_points] with DensePose
part ids. These part_ids are 0-indexed, where the first non-background
part has index 0.
dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates in (y, x, v, u) normalized format.
scope: name scope.
Returns:
new_dp_part_ids: a tensor of shape [num_instances, num_points] with
DensePose part ids after flipping.
new_dp_surface_coords: a tensor of shape [num_instances, num_points, 4] with
DensePose surface coordinates after flipping.
"""
with tf.name_scope(scope, 'DensePoseFlipHorizontal'):
# First flip x coordinate.
y, x, vu = tf.split(dp_surface_coords, num_or_size_splits=[1, 1, 2], axis=2)
xflipped = 1.0 - x
# Flip part ids and surface coordinates.
horizontal_flip = DensePoseHorizontalFlip()
new_dp_part_ids, new_vu = horizontal_flip.flip_parts_and_coords(
dp_part_ids, vu)
new_dp_surface_coords = tf.concat([y, xflipped, new_vu], axis=2)
return new_dp_part_ids, new_dp_surface_coords
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment