Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
5a2cf36f
Commit
5a2cf36f
authored
Jul 23, 2020
by
Kaushik Shivakumar
Browse files
Merge remote-tracking branch 'upstream/master' into newavarecords
parents
258ddfc3
a829e648
Changes
330
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1841 additions
and
140 deletions
+1841
-140
research/object_detection/configs/tf2/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.config
...figs/tf2/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.config
...gs/tf2/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config
...figs/tf2/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.config
...igs/tf2/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.config
+197
-0
research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
...nfigs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
+197
-0
research/object_detection/core/box_predictor.py
research/object_detection/core/box_predictor.py
+1
-1
research/object_detection/core/densepose_ops.py
research/object_detection/core/densepose_ops.py
+8
-7
research/object_detection/core/model.py
research/object_detection/core/model.py
+47
-17
research/object_detection/core/preprocessor.py
research/object_detection/core/preprocessor.py
+140
-10
research/object_detection/core/preprocessor_test.py
research/object_detection/core/preprocessor_test.py
+84
-71
research/object_detection/core/standard_fields.py
research/object_detection/core/standard_fields.py
+10
-0
research/object_detection/core/target_assigner.py
research/object_detection/core/target_assigner.py
+254
-16
research/object_detection/core/target_assigner_test.py
research/object_detection/core/target_assigner_test.py
+268
-0
research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
...ion/dataset_tools/context_rcnn/add_context_to_examples.py
+6
-4
research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
...et_tools/context_rcnn/add_context_to_examples_tf1_test.py
+7
-1
research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
...ols/context_rcnn/create_cococameratraps_tfexample_main.py
+6
-3
research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
...context_rcnn/create_cococameratraps_tfexample_tf1_test.py
+5
-1
research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
...ion/dataset_tools/context_rcnn/generate_detection_data.py
+5
-3
research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
...et_tools/context_rcnn/generate_detection_data_tf1_test.py
+5
-1
research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
...ion/dataset_tools/context_rcnn/generate_embedding_data.py
+10
-5
No files found.
research/object_detection/configs/tf2/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.config
0 → 100644
View file @
5a2cf36f
# SSD with Resnet 101 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 35.4 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
640
width
:
640
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet101_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet101.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
25000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
25000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.config
0 → 100644
View file @
5a2cf36f
# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 39.6 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
1024
width
:
1024
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet152_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
100000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
100000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config
0 → 100644
View file @
5a2cf36f
# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 35.6 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
640
width
:
640
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet152_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
25000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
25000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.config
0 → 100644
View file @
5a2cf36f
# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 38.3 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
1024
width
:
1024
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet50_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
100000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
100000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
0 → 100644
View file @
5a2cf36f
# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 34.3 mAP on COCO17 Val
model
{
ssd
{
inplace_batchnorm_update
:
true
freeze_batchnorm
:
false
num_classes
:
90
box_coder
{
faster_rcnn_box_coder
{
y_scale
:
10
.
0
x_scale
:
10
.
0
height_scale
:
5
.
0
width_scale
:
5
.
0
}
}
matcher
{
argmax_matcher
{
matched_threshold
:
0
.
5
unmatched_threshold
:
0
.
5
ignore_thresholds
:
false
negatives_lower_than_unmatched
:
true
force_match_for_each_row
:
true
use_matmul_gather
:
true
}
}
similarity_calculator
{
iou_similarity
{
}
}
encode_background_as_zeros
:
true
anchor_generator
{
multiscale_anchor_generator
{
min_level
:
3
max_level
:
7
anchor_scale
:
4
.
0
aspect_ratios
: [
1
.
0
,
2
.
0
,
0
.
5
]
scales_per_octave
:
2
}
}
image_resizer
{
fixed_shape_resizer
{
height
:
640
width
:
640
}
}
box_predictor
{
weight_shared_convolutional_box_predictor
{
depth
:
256
class_prediction_bias_init
: -
4
.
6
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
random_normal_initializer
{
stddev
:
0
.
01
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
num_layers_before_predictor
:
4
kernel_size
:
3
}
}
feature_extractor
{
type
:
'ssd_resnet50_v1_fpn_keras'
fpn
{
min_level
:
3
max_level
:
7
}
min_depth
:
16
depth_multiplier
:
1
.
0
conv_hyperparams
{
activation
:
RELU_6
,
regularizer
{
l2_regularizer
{
weight
:
0
.
0004
}
}
initializer
{
truncated_normal_initializer
{
stddev
:
0
.
03
mean
:
0
.
0
}
}
batch_norm
{
scale
:
true
,
decay
:
0
.
997
,
epsilon
:
0
.
001
,
}
}
override_base_feature_extractor_hyperparams
:
true
}
loss
{
classification_loss
{
weighted_sigmoid_focal
{
alpha
:
0
.
25
gamma
:
2
.
0
}
}
localization_loss
{
weighted_smooth_l1
{
}
}
classification_weight
:
1
.
0
localization_weight
:
1
.
0
}
normalize_loss_by_num_matches
:
true
normalize_loc_loss_by_codesize
:
true
post_processing
{
batch_non_max_suppression
{
score_threshold
:
1
e
-
8
iou_threshold
:
0
.
6
max_detections_per_class
:
100
max_total_detections
:
100
}
score_converter
:
SIGMOID
}
}
}
train_config
: {
fine_tune_checkpoint_version
:
V2
fine_tune_checkpoint
:
"PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
fine_tune_checkpoint_type
:
"classification"
batch_size
:
64
sync_replicas
:
true
startup_delay_steps
:
0
replicas_to_aggregate
:
8
use_bfloat16
:
true
num_steps
:
25000
data_augmentation_options
{
random_horizontal_flip
{
}
}
data_augmentation_options
{
random_crop_image
{
min_object_covered
:
0
.
0
min_aspect_ratio
:
0
.
75
max_aspect_ratio
:
3
.
0
min_area
:
0
.
75
max_area
:
1
.
0
overlap_thresh
:
0
.
0
}
}
optimizer
{
momentum_optimizer
: {
learning_rate
: {
cosine_decay_learning_rate
{
learning_rate_base
: .
04
total_steps
:
25000
warmup_learning_rate
: .
013333
warmup_steps
:
2000
}
}
momentum_optimizer_value
:
0
.
9
}
use_moving_average
:
false
}
max_number_of_boxes
:
100
unpad_groundtruth_tensors
:
false
}
train_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config
: {
metrics_set
:
"coco_detection_metrics"
use_moving_averages
:
false
}
eval_input_reader
: {
label_map_path
:
"PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle
:
false
num_epochs
:
1
tf_record_input_reader
{
input_path
:
"PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
research/object_detection/core/box_predictor.py
View file @
5a2cf36f
...
...
@@ -134,7 +134,7 @@ class BoxPredictor(object):
pass
class
KerasBoxPredictor
(
tf
.
keras
.
Model
):
class
KerasBoxPredictor
(
tf
.
keras
.
layers
.
Layer
):
"""Keras-based BoxPredictor."""
def
__init__
(
self
,
is_training
,
num_classes
,
freeze_batchnorm
,
...
...
research/object_detection/core/densepose_ops.py
View file @
5a2cf36f
...
...
@@ -42,9 +42,6 @@ PART_NAMES = [
b
'left_face'
,
]
_SRC_PATH
=
(
'google3/third_party/tensorflow_models/object_detection/'
'dataset_tools/densepose'
)
def
scale
(
dp_surface_coords
,
y_scale
,
x_scale
,
scope
=
None
):
"""Scales DensePose coordinates in y and x dimensions.
...
...
@@ -266,10 +263,14 @@ class DensePoseHorizontalFlip(object):
def
__init__
(
self
):
"""Constructor."""
uv_symmetry_transforms_path
=
os
.
path
.
join
(
tf
.
resource_loader
.
get_data_files_path
(),
'..'
,
'dataset_tools'
,
'densepose'
,
'UV_symmetry_transforms.mat'
)
data
=
scipy
.
io
.
loadmat
(
uv_symmetry_transforms_path
)
path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
uv_symmetry_transforms_path
=
tf
.
resource_loader
.
get_path_to_datafile
(
os
.
path
.
join
(
path
,
'..'
,
'dataset_tools'
,
'densepose'
,
'UV_symmetry_transforms.mat'
))
tf
.
logging
.
info
(
'Loading DensePose symmetry transforms file from {}'
.
format
(
uv_symmetry_transforms_path
))
with
tf
.
io
.
gfile
.
GFile
(
uv_symmetry_transforms_path
,
'rb'
)
as
f
:
data
=
scipy
.
io
.
loadmat
(
f
)
# Create lookup maps which indicate how a VU coordinate changes after a
# horizontal flip.
...
...
research/object_detection/core/model.py
View file @
5a2cf36f
...
...
@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities
} or
keypoint_visibilities
, densepose_*}
fields.InputDataFields.is_annotated.
Returns:
...
...
@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities} or
keypoint_visibilities
, densepose_*
} or
fields.InputDataFields.is_annotated.
Returns:
...
...
@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
detection_classes: [batch, max_detections]
(If a model is producing class-agnostic detections, this field may be
missing)
instance
_masks: [batch, max_detections,
i
ma
ge
_height,
i
ma
ge
_width]
detection
_masks: [batch, max_detections, ma
sk
_height, ma
sk
_width]
(optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
detection_keypoints: [batch, max_detections, num_keypoints, 2]
(optional)
detection_keypoint_scores: [batch, max_detections, num_keypoints]
(optional)
detection_surface_coords: [batch, max_detections, mask_height,
mask_width, 2] (optional)
num_detections: [batch]
In addition to the above fields this stage also outputs the following
...
...
@@ -288,19 +293,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
"""
pass
def
provide_groundtruth
(
self
,
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_masks_list
=
None
,
groundtruth_keypoints_list
=
None
,
groundtruth_keypoint_visibilities_list
=
None
,
groundtruth_weights_list
=
None
,
groundtruth_confidences_list
=
None
,
groundtruth_is_crowd_list
=
None
,
groundtruth_group_of_list
=
None
,
groundtruth_area_list
=
None
,
is_annotated_list
=
None
,
groundtruth_labeled_classes
=
None
):
def
provide_groundtruth
(
self
,
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_masks_list
=
None
,
groundtruth_keypoints_list
=
None
,
groundtruth_keypoint_visibilities_list
=
None
,
groundtruth_dp_num_points_list
=
None
,
groundtruth_dp_part_ids_list
=
None
,
groundtruth_dp_surface_coords_list
=
None
,
groundtruth_weights_list
=
None
,
groundtruth_confidences_list
=
None
,
groundtruth_is_crowd_list
=
None
,
groundtruth_group_of_list
=
None
,
groundtruth_area_list
=
None
,
is_annotated_list
=
None
,
groundtruth_labeled_classes
=
None
):
"""Provide groundtruth tensors.
Args:
...
...
@@ -324,6 +333,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
`groundtruth_keypoint_visibilities_list`).
groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors
of shape [num_boxes, num_keypoints] containing keypoint visibilities.
groundtruth_dp_num_points_list: a list of 1-D tf.int32 tensors of shape
[num_boxes] containing the number of DensePose sampled points.
groundtruth_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding.
groundtruth_dp_surface_coords_list: a list of 3-D tf.float32 tensors of
shape [num_boxes, max_sampled_points, 4] containing the DensePose
surface coordinates for each sampled point. Note that there may be
padding.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
...
...
@@ -361,6 +379,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
keypoint_visibilities
]
=
(
groundtruth_keypoint_visibilities_list
)
if
groundtruth_dp_num_points_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
densepose_num_points
]
=
(
groundtruth_dp_num_points_list
)
if
groundtruth_dp_part_ids_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
densepose_part_ids
]
=
(
groundtruth_dp_part_ids_list
)
if
groundtruth_dp_surface_coords_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
densepose_surface_coords
]
=
(
groundtruth_dp_surface_coords_list
)
if
groundtruth_is_crowd_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
is_crowd
]
=
groundtruth_is_crowd_list
...
...
research/object_detection/core/preprocessor.py
View file @
5a2cf36f
...
...
@@ -3984,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
Args:
image: rank 3 float32 tensor containing 1 image ->
[height, width,channels].
[height, width,
channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
...
...
@@ -4128,6 +4128,131 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
return
return_values
def
random_scale_crop_and_pad_to_square
(
image
,
boxes
,
labels
,
label_weights
,
masks
=
None
,
keypoints
=
None
,
scale_min
=
0.1
,
scale_max
=
2.0
,
output_size
=
512
,
resize_method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
seed
=
None
):
"""Randomly scale, crop, and then pad an image to fixed square dimensions.
Randomly scale, crop, and then pad an image to the desired square output
dimensions. Specifically, this method first samples a random_scale factor
from a uniform distribution between scale_min and scale_max, and then resizes
the image such that it's maximum dimension is (output_size * random_scale).
Secondly, a square output_size crop is extracted from the resized image
(note, this will only occur when random_scale > 1.0). Lastly, the cropped
region is padded to the desired square output_size, by filling with zeros.
The augmentation is borrowed from [1]
[1]: https://arxiv.org/abs/1911.09070
Args:
image: rank 3 float32 tensor containing 1 image ->
[height, width, channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
are in normalized form meaning their coordinates vary between [0, 1]. Each
row is in the form of [ymin, xmin, ymax, xmax]. Boxes on the crop boundary
are clipped to the boundary and boxes falling outside the crop are
ignored.
labels: rank 1 int32 tensor containing the object classes.
label_weights: float32 tensor of shape [num_instances] representing the
weight for each box.
masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
width] containing instance masks. The masks are of the same height, width
as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
scale_min: float, the minimum value for the random scale factor.
scale_max: float, the maximum value for the random scale factor.
output_size: int, the desired (square) output image size.
resize_method: tf.image.ResizeMethod, resize method to use when scaling the
input images.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
label_weights: rank 1 float32 tensor with shape [num_instances].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
"""
img_shape
=
tf
.
shape
(
image
)
input_height
,
input_width
=
img_shape
[
0
],
img_shape
[
1
]
random_scale
=
tf
.
random_uniform
([],
scale_min
,
scale_max
,
seed
=
seed
)
# Compute the scaled height and width from the random scale.
max_input_dim
=
tf
.
cast
(
tf
.
maximum
(
input_height
,
input_width
),
tf
.
float32
)
input_ar_y
=
tf
.
cast
(
input_height
,
tf
.
float32
)
/
max_input_dim
input_ar_x
=
tf
.
cast
(
input_width
,
tf
.
float32
)
/
max_input_dim
scaled_height
=
tf
.
cast
(
random_scale
*
output_size
*
input_ar_y
,
tf
.
int32
)
scaled_width
=
tf
.
cast
(
random_scale
*
output_size
*
input_ar_x
,
tf
.
int32
)
# Compute the offsets:
offset_y
=
tf
.
cast
(
scaled_height
-
output_size
,
tf
.
float32
)
offset_x
=
tf
.
cast
(
scaled_width
-
output_size
,
tf
.
float32
)
offset_y
=
tf
.
maximum
(
0.0
,
offset_y
)
*
tf
.
random_uniform
([],
0
,
1
,
seed
=
seed
)
offset_x
=
tf
.
maximum
(
0.0
,
offset_x
)
*
tf
.
random_uniform
([],
0
,
1
,
seed
=
seed
)
offset_y
=
tf
.
cast
(
offset_y
,
tf
.
int32
)
offset_x
=
tf
.
cast
(
offset_x
,
tf
.
int32
)
# Scale, crop, and pad the input image.
scaled_image
=
tf
.
image
.
resize_images
(
image
,
[
scaled_height
,
scaled_width
],
method
=
resize_method
)
scaled_image
=
scaled_image
[
offset_y
:
offset_y
+
output_size
,
offset_x
:
offset_x
+
output_size
,
:]
output_image
=
tf
.
image
.
pad_to_bounding_box
(
scaled_image
,
0
,
0
,
output_size
,
output_size
)
# Update the boxes.
new_window
=
tf
.
cast
(
tf
.
stack
([
offset_y
,
offset_x
,
offset_y
+
output_size
,
offset_x
+
output_size
]),
dtype
=
tf
.
float32
)
new_window
/=
tf
.
cast
(
tf
.
stack
([
scaled_height
,
scaled_width
,
scaled_height
,
scaled_width
]),
dtype
=
tf
.
float32
)
boxlist
=
box_list
.
BoxList
(
boxes
)
boxlist
=
box_list_ops
.
change_coordinate_frame
(
boxlist
,
new_window
)
boxlist
,
indices
=
box_list_ops
.
prune_completely_outside_window
(
boxlist
,
[
0.0
,
0.0
,
1.0
,
1.0
])
boxlist
=
box_list_ops
.
clip_to_window
(
boxlist
,
[
0.0
,
0.0
,
1.0
,
1.0
],
filter_nonoverlapping
=
False
)
return_values
=
[
output_image
,
boxlist
.
get
(),
tf
.
gather
(
labels
,
indices
),
tf
.
gather
(
label_weights
,
indices
)]
if
masks
is
not
None
:
new_masks
=
tf
.
expand_dims
(
masks
,
-
1
)
new_masks
=
tf
.
image
.
resize_images
(
new_masks
,
[
scaled_height
,
scaled_width
],
method
=
resize_method
)
new_masks
=
new_masks
[:,
offset_y
:
offset_y
+
output_size
,
offset_x
:
offset_x
+
output_size
,
:]
new_masks
=
tf
.
image
.
pad_to_bounding_box
(
new_masks
,
0
,
0
,
output_size
,
output_size
)
new_masks
=
tf
.
squeeze
(
new_masks
,
[
-
1
])
return_values
.
append
(
tf
.
gather
(
new_masks
,
indices
))
if
keypoints
is
not
None
:
keypoints
=
tf
.
gather
(
keypoints
,
indices
)
keypoints
=
keypoint_ops
.
change_coordinate_frame
(
keypoints
,
new_window
)
keypoints
=
keypoint_ops
.
prune_outside_window
(
keypoints
,
[
0.0
,
0.0
,
1.0
,
1.0
])
return_values
.
append
(
keypoints
)
return
return_values
def
get_default_func_arg_map
(
include_label_weights
=
True
,
include_label_confidences
=
False
,
include_multiclass_scores
=
False
,
...
...
@@ -4230,15 +4355,14 @@ def get_default_func_arg_map(include_label_weights=True,
random_adjust_saturation
:
(
fields
.
InputDataFields
.
image
,),
random_distort_color
:
(
fields
.
InputDataFields
.
image
,),
random_jitter_boxes
:
(
fields
.
InputDataFields
.
groundtruth_boxes
,),
random_crop_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_weights
,
groundtruth_label_confidences
,
multiclass_scores
,
groundtruth_instance_masks
,
groundtruth_keypoints
,
groundtruth_keypoint_visibilities
,
groundtruth_dp_num_points
,
groundtruth_dp_part_ids
,
groundtruth_dp_surface_coords
),
random_crop_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_weights
,
groundtruth_label_confidences
,
multiclass_scores
,
groundtruth_instance_masks
,
groundtruth_keypoints
,
groundtruth_keypoint_visibilities
,
groundtruth_dp_num_points
,
groundtruth_dp_part_ids
,
groundtruth_dp_surface_coords
),
random_pad_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
groundtruth_instance_masks
,
...
...
@@ -4361,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_weights
,
groundtruth_instance_masks
,
groundtruth_keypoints
),
random_scale_crop_and_pad_to_square
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_weights
,
groundtruth_instance_masks
,
groundtruth_keypoints
),
}
return
prep_func_arg_map
...
...
research/object_detection/core/preprocessor_test.py
View file @
5a2cf36f
...
...
@@ -712,76 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
test_masks
=
True
,
test_keypoints
=
True
)
@
parameterized
.
parameters
(
{
'include_dense_pose'
:
False
},
{
'include_dense_pose'
:
True
}
)
def
testRunRandomHorizontalFlipWithMaskAndKeypoints
(
self
,
include_dense_pose
):
def
graph_fn
():
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
image_height
=
3
image_width
=
3
images
=
tf
.
random_uniform
([
1
,
image_height
,
image_width
,
3
])
boxes
=
self
.
createTestBoxes
()
masks
=
self
.
createTestMasks
()
keypoints
,
keypoint_visibilities
=
self
.
createTestKeypoints
()
dp_num_point
,
dp_part_ids
,
dp_surface_coords
=
self
.
createTestDensePose
()
keypoint_flip_permutation
=
self
.
createKeypointFlipPermutation
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
,
fields
.
InputDataFields
.
groundtruth_keypoint_visibilities
:
keypoint_visibilities
}
if
include_dense_pose
:
tensor_dict
.
update
({
fields
.
InputDataFields
.
groundtruth_dp_num_points
:
dp_num_point
,
fields
.
InputDataFields
.
groundtruth_dp_part_ids
:
dp_part_ids
,
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
:
dp_surface_coords
})
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{
'keypoint_flip_permutation'
:
keypoint_flip_permutation
})]
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
,
include_keypoints
=
True
,
include_keypoint_visibilities
=
True
,
include_dense_pose
=
include_dense_pose
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
,
func_arg_map
=
preprocessor_arg_map
)
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
masks
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
keypoints
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
keypoint_visibilities
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoint_visibilities
]
output_tensors
=
[
boxes
,
masks
,
keypoints
,
keypoint_visibilities
]
if
include_dense_pose
:
dp_num_points
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_num_points
]
dp_part_ids
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_part_ids
]
dp_surface_coords
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
]
output_tensors
.
extend
([
dp_num_points
,
dp_part_ids
,
dp_surface_coords
])
return
output_tensors
output_tensors
=
self
.
execute_cpu
(
graph_fn
,
[])
self
.
assertIsNotNone
(
output_tensors
[
0
])
# Boxes.
self
.
assertIsNotNone
(
output_tensors
[
1
])
# Masks.
self
.
assertIsNotNone
(
output_tensors
[
2
])
# Keypoints
self
.
assertIsNotNone
(
output_tensors
[
3
])
# Keypoint Visibilities.
if
include_dense_pose
:
self
.
assertIsNotNone
(
output_tensors
[
4
])
# DensePose Num Points.
self
.
assertIsNotNone
(
output_tensors
[
5
])
# DensePose Part IDs.
self
.
assertIsNotNone
(
output_tensors
[
6
])
# DensePose Surface Coords
def
testRandomVerticalFlip
(
self
):
...
...
@@ -2380,7 +2310,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
@
parameterized
.
parameters
(
{
'include_dense_pose'
:
False
},
{
'include_dense_pose'
:
True
}
)
def
testRandomPadImageWithKeypointsAndMasks
(
self
,
include_dense_pose
):
def
graph_fn
():
...
...
@@ -3912,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
size
=
max
(
image
.
shape
)
self
.
assertAlmostEqual
(
scale
*
256.0
,
size
)
self
.
assertAllClose
(
image
[:,
:,
0
],
masks
[
0
,
:,
:])
@
parameterized
.
named_parameters
((
'scale_0_1'
,
0.1
),
(
'scale_1_0'
,
1.0
),
(
'scale_2_0'
,
2.0
))
def
test_random_scale_crop_and_pad_to_square
(
self
,
scale
):
def
graph_fn
():
image
=
np
.
random
.
randn
(
512
,
256
,
1
)
box_centers
=
[
0.25
,
0.5
,
0.75
]
box_size
=
0.1
box_corners
=
[]
box_labels
=
[]
box_label_weights
=
[]
keypoints
=
[]
masks
=
[]
for
center_y
in
box_centers
:
for
center_x
in
box_centers
:
box_corners
.
append
(
[
center_y
-
box_size
/
2.0
,
center_x
-
box_size
/
2.0
,
center_y
+
box_size
/
2.0
,
center_x
+
box_size
/
2.0
])
box_labels
.
append
([
1
])
box_label_weights
.
append
([
1.
])
keypoints
.
append
(
[[
center_y
-
box_size
/
2.0
,
center_x
-
box_size
/
2.0
],
[
center_y
+
box_size
/
2.0
,
center_x
+
box_size
/
2.0
]])
masks
.
append
(
image
[:,
:,
0
].
reshape
(
512
,
256
))
image
=
tf
.
constant
(
image
)
boxes
=
tf
.
constant
(
box_corners
)
labels
=
tf
.
constant
(
box_labels
)
label_weights
=
tf
.
constant
(
box_label_weights
)
keypoints
=
tf
.
constant
(
keypoints
)
masks
=
tf
.
constant
(
np
.
stack
(
masks
))
(
new_image
,
new_boxes
,
_
,
_
,
new_masks
,
new_keypoints
)
=
preprocessor
.
random_scale_crop_and_pad_to_square
(
image
,
boxes
,
labels
,
label_weights
,
masks
=
masks
,
keypoints
=
keypoints
,
scale_min
=
scale
,
scale_max
=
scale
,
output_size
=
512
)
return
new_image
,
new_boxes
,
new_masks
,
new_keypoints
image
,
boxes
,
masks
,
keypoints
=
self
.
execute_cpu
(
graph_fn
,
[])
# Since random_scale_crop_and_pad_to_square may prune and clip boxes,
# we only need to find one of the boxes that was not clipped and check
# that it matches the expected dimensions. Note, assertAlmostEqual(a, b)
# is equivalent to round(a-b, 7) == 0.
any_box_has_correct_size
=
False
effective_scale_y
=
int
(
scale
*
512
)
/
512.0
effective_scale_x
=
int
(
scale
*
256
)
/
512.0
expected_size_y
=
0.1
*
effective_scale_y
expected_size_x
=
0.1
*
effective_scale_x
for
box
in
boxes
:
ymin
,
xmin
,
ymax
,
xmax
=
box
any_box_has_correct_size
|=
(
(
round
(
ymin
,
7
)
!=
0.0
)
and
(
round
(
xmin
,
7
)
!=
0.0
)
and
(
round
(
ymax
,
7
)
!=
1.0
)
and
(
round
(
xmax
,
7
)
!=
1.0
)
and
(
round
((
ymax
-
ymin
)
-
expected_size_y
,
7
)
==
0.0
)
and
(
round
((
xmax
-
xmin
)
-
expected_size_x
,
7
)
==
0.0
))
self
.
assertTrue
(
any_box_has_correct_size
)
# Similar to the approach above where we check for at least one box with the
# expected dimensions, we check for at least one pair of keypoints whose
# distance matches the expected dimensions.
any_keypoint_pair_has_correct_dist
=
False
for
keypoint_pair
in
keypoints
:
ymin
,
xmin
=
keypoint_pair
[
0
]
ymax
,
xmax
=
keypoint_pair
[
1
]
any_keypoint_pair_has_correct_dist
|=
(
(
round
(
ymin
,
7
)
!=
0.0
)
and
(
round
(
xmin
,
7
)
!=
0.0
)
and
(
round
(
ymax
,
7
)
!=
1.0
)
and
(
round
(
xmax
,
7
)
!=
1.0
)
and
(
round
((
ymax
-
ymin
)
-
expected_size_y
,
7
)
==
0.0
)
and
(
round
((
xmax
-
xmin
)
-
expected_size_x
,
7
)
==
0.0
))
self
.
assertTrue
(
any_keypoint_pair_has_correct_dist
)
self
.
assertAlmostEqual
(
512.0
,
image
.
shape
[
0
])
self
.
assertAlmostEqual
(
512.0
,
image
.
shape
[
1
])
self
.
assertAllClose
(
image
[:,
:,
0
],
masks
[
0
,
:,
:])
...
...
research/object_detection/core/standard_fields.py
View file @
5a2cf36f
...
...
@@ -141,6 +141,8 @@ class DetectionResultFields(object):
for detection boxes in the image including background class.
detection_classes: detection-level class labels.
detection_masks: contains a segmentation mask for each detection box.
detection_surface_coords: contains DensePose surface coordinates for each
box.
detection_boundaries: contains an object boundary for each detection box.
detection_keypoints: contains detection keypoints for each detection box.
detection_keypoint_scores: contains detection keypoint scores.
...
...
@@ -161,6 +163,7 @@ class DetectionResultFields(object):
detection_features
=
'detection_features'
detection_classes
=
'detection_classes'
detection_masks
=
'detection_masks'
detection_surface_coords
=
'detection_surface_coords'
detection_boundaries
=
'detection_boundaries'
detection_keypoints
=
'detection_keypoints'
detection_keypoint_scores
=
'detection_keypoint_scores'
...
...
@@ -182,7 +185,11 @@ class BoxListFields(object):
masks: masks per bounding box.
boundaries: boundaries per bounding box.
keypoints: keypoints per bounding box.
keypoint_visibilities: keypoint visibilities per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
densepose_num_points: number of DensePose points per bounding box.
densepose_part_ids: DensePose part ids per bounding box.
densepose_surface_coords: DensePose surface coordinates per bounding box.
is_crowd: is_crowd annotation per bounding box.
"""
boxes
=
'boxes'
...
...
@@ -196,6 +203,9 @@ class BoxListFields(object):
keypoints
=
'keypoints'
keypoint_visibilities
=
'keypoint_visibilities'
keypoint_heatmaps
=
'keypoint_heatmaps'
densepose_num_points
=
'densepose_num_points'
densepose_part_ids
=
'densepose_part_ids'
densepose_surface_coords
=
'densepose_surface_coords'
is_crowd
=
'is_crowd'
group_of
=
'group_of'
...
...
research/object_detection/core/target_assigner.py
View file @
5a2cf36f
...
...
@@ -45,6 +45,7 @@ from object_detection.box_coders import mean_stddev_box_coder
from
object_detection.core
import
box_coder
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
densepose_ops
from
object_detection.core
import
keypoint_ops
from
object_detection.core
import
matcher
as
mat
from
object_detection.core
import
region_similarity_calculator
as
sim_calc
...
...
@@ -799,17 +800,15 @@ def get_batch_predictions_from_indices(batch_predictions, indices):
function.
Args:
batch_predictions: A tensor of shape [batch_size, height, width, 2] for
single class offsets and [batch_size, height, width, class, 2] for
multiple classes offsets (e.g. keypoint joint offsets) representing the
(height, width) or (y_offset, x_offset) predictions over a batch.
indices: A tensor of shape [num_instances, 3] for single class offset and
[num_instances, 4] for multiple classes offsets representing the indices
in the batch to be penalized in a loss function
batch_predictions: A tensor of shape [batch_size, height, width, channels]
or [batch_size, height, width, class, channels] for class-specific
features (e.g. keypoint joint offsets).
indices: A tensor of shape [num_instances, 3] for single class features or
[num_instances, 4] for multiple classes features.
Returns:
values: A tensor of shape [num_instances,
2
] holding the predicted
values
at the given indices.
values: A tensor of shape [num_instances,
channels
] holding the predicted
values
at the given indices.
"""
return
tf
.
gather_nd
(
batch_predictions
,
indices
)
...
...
@@ -1601,6 +1600,17 @@ class CenterNetKeypointTargetAssigner(object):
return
(
batch_indices
,
batch_offsets
,
batch_weights
)
def
_resize_masks
(
masks
,
height
,
width
,
method
):
# Resize segmentation masks to conform to output dimensions. Use TF2
# image resize because TF1's version is buggy:
# https://yaqs.corp.google.com/eng/q/4970450458378240
masks
=
tf2
.
image
.
resize
(
masks
[:,
:,
:,
tf
.
newaxis
],
size
=
(
height
,
width
),
method
=
method
)
return
masks
[:,
:,
:,
0
]
class
CenterNetMaskTargetAssigner
(
object
):
"""Wrapper to compute targets for segmentation masks."""
...
...
@@ -1642,13 +1652,9 @@ class CenterNetMaskTargetAssigner(object):
segmentation_targets_list
=
[]
for
gt_masks
,
gt_classes
in
zip
(
gt_masks_list
,
gt_classes_list
):
# Resize segmentation masks to conform to output dimensions. Use TF2
# image resize because TF1's version is buggy:
# https://yaqs.corp.google.com/eng/q/4970450458378240
gt_masks
=
tf2
.
image
.
resize
(
gt_masks
[:,
:,
:,
tf
.
newaxis
],
size
=
(
output_height
,
output_width
),
method
=
mask_resize_method
)
gt_masks
=
_resize_masks
(
gt_masks
,
output_height
,
output_width
,
mask_resize_method
)
gt_masks
=
gt_masks
[:,
:,
:,
tf
.
newaxis
]
gt_classes_reshaped
=
tf
.
reshape
(
gt_classes
,
[
-
1
,
1
,
1
,
num_classes
])
# Shape: [h, w, num_classes].
segmentations_for_image
=
tf
.
reduce_max
(
...
...
@@ -1657,3 +1663,235 @@ class CenterNetMaskTargetAssigner(object):
segmentation_target
=
tf
.
stack
(
segmentation_targets_list
,
axis
=
0
)
return
segmentation_target
class
CenterNetDensePoseTargetAssigner
(
object
):
"""Wrapper to compute targets for DensePose task."""
def
__init__
(
self
,
stride
,
num_parts
=
24
):
self
.
_stride
=
stride
self
.
_num_parts
=
num_parts
def
assign_part_and_coordinate_targets
(
self
,
height
,
width
,
gt_dp_num_points_list
,
gt_dp_part_ids_list
,
gt_dp_surface_coords_list
,
gt_weights_list
=
None
):
"""Returns the DensePose part_id and coordinate targets and their indices.
The returned values are expected to be used with predicted tensors
of size (batch_size, height//self._stride, width//self._stride, 2). The
predicted values at the relevant indices can be retrieved with the
get_batch_predictions_from_indices function.
Args:
height: int, height of input to the model. This is used to determine the
height of the output.
width: int, width of the input to the model. This is used to determine the
width of the output.
gt_dp_num_points_list: a list of 1-D tf.int32 tensors of shape [num_boxes]
containing the number of DensePose sampled points per box.
gt_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding, as
boxes may contain a different number of sampled points.
gt_dp_surface_coords_list: a list of 3-D tf.float32 tensors of shape
[num_boxes, max_sampled_points, 4] containing the DensePose surface
coordinates (normalized) for each sampled point. Note that there may be
padding.
gt_weights_list: A list of 1-D tensors with shape [num_boxes]
corresponding to the weight of each groundtruth detection box.
Returns:
batch_indices: an integer tensor of shape [num_total_points, 4] holding
the indices inside the predicted tensor which should be penalized. The
first column indicates the index along the batch dimension and the
second and third columns indicate the index along the y and x
dimensions respectively. The fourth column is the part index.
batch_part_ids: an int tensor of shape [num_total_points, num_parts]
holding 1-hot encodings of parts for each sampled point.
batch_surface_coords: a float tensor of shape [num_total_points, 2]
holding the expected (v, u) coordinates for each sampled point.
batch_weights: a float tensor of shape [num_total_points] indicating the
weight of each prediction.
Note that num_total_points = batch_size * num_boxes * max_sampled_points.
"""
if
gt_weights_list
is
None
:
gt_weights_list
=
[
None
]
*
len
(
gt_dp_num_points_list
)
batch_indices
=
[]
batch_part_ids
=
[]
batch_surface_coords
=
[]
batch_weights
=
[]
for
i
,
(
num_points
,
part_ids
,
surface_coords
,
weights
)
in
enumerate
(
zip
(
gt_dp_num_points_list
,
gt_dp_part_ids_list
,
gt_dp_surface_coords_list
,
gt_weights_list
)):
num_boxes
,
max_sampled_points
=
(
shape_utils
.
combined_static_and_dynamic_shape
(
part_ids
))
part_ids_flattened
=
tf
.
reshape
(
part_ids
,
[
-
1
])
part_ids_one_hot
=
tf
.
one_hot
(
part_ids_flattened
,
depth
=
self
.
_num_parts
)
# Get DensePose coordinates in the output space.
surface_coords_abs
=
densepose_ops
.
to_absolute_coordinates
(
surface_coords
,
height
//
self
.
_stride
,
width
//
self
.
_stride
)
surface_coords_abs
=
tf
.
reshape
(
surface_coords_abs
,
[
-
1
,
4
])
# Each tensor has shape [num_boxes * max_sampled_points].
yabs
,
xabs
,
v
,
u
=
tf
.
unstack
(
surface_coords_abs
,
axis
=-
1
)
# Get the indices (in output space) for the DensePose coordinates. Note
# that if self._stride is larger than 1, this will have the effect of
# reducing spatial resolution of the groundtruth points.
indices_y
=
tf
.
cast
(
yabs
,
tf
.
int32
)
indices_x
=
tf
.
cast
(
xabs
,
tf
.
int32
)
# Assign ones if weights are not provided.
if
weights
is
None
:
weights
=
tf
.
ones
(
num_boxes
,
dtype
=
tf
.
float32
)
# Create per-point weights.
weights_per_point
=
tf
.
reshape
(
tf
.
tile
(
weights
[:,
tf
.
newaxis
],
multiples
=
[
1
,
max_sampled_points
]),
shape
=
[
-
1
])
# Mask out invalid (i.e. padded) DensePose points.
num_points_tiled
=
tf
.
tile
(
num_points
[:,
tf
.
newaxis
],
multiples
=
[
1
,
max_sampled_points
])
range_tiled
=
tf
.
tile
(
tf
.
range
(
max_sampled_points
)[
tf
.
newaxis
,
:],
multiples
=
[
num_boxes
,
1
])
valid_points
=
tf
.
math
.
less
(
range_tiled
,
num_points_tiled
)
valid_points
=
tf
.
cast
(
tf
.
reshape
(
valid_points
,
[
-
1
]),
dtype
=
tf
.
float32
)
weights_per_point
=
weights_per_point
*
valid_points
# Shape of [num_boxes * max_sampled_points] integer tensor filled with
# current batch index.
batch_index
=
i
*
tf
.
ones_like
(
indices_y
,
dtype
=
tf
.
int32
)
batch_indices
.
append
(
tf
.
stack
([
batch_index
,
indices_y
,
indices_x
,
part_ids_flattened
],
axis
=
1
))
batch_part_ids
.
append
(
part_ids_one_hot
)
batch_surface_coords
.
append
(
tf
.
stack
([
v
,
u
],
axis
=
1
))
batch_weights
.
append
(
weights_per_point
)
batch_indices
=
tf
.
concat
(
batch_indices
,
axis
=
0
)
batch_part_ids
=
tf
.
concat
(
batch_part_ids
,
axis
=
0
)
batch_surface_coords
=
tf
.
concat
(
batch_surface_coords
,
axis
=
0
)
batch_weights
=
tf
.
concat
(
batch_weights
,
axis
=
0
)
return
batch_indices
,
batch_part_ids
,
batch_surface_coords
,
batch_weights
def
filter_mask_overlap_min_area
(
masks
):
"""If a pixel belongs to 2 instances, remove it from the larger instance."""
num_instances
=
tf
.
shape
(
masks
)[
0
]
def
_filter_min_area
():
"""Helper function to filter non empty masks."""
areas
=
tf
.
reduce_sum
(
masks
,
axis
=
[
1
,
2
],
keepdims
=
True
)
per_pixel_area
=
masks
*
areas
# Make sure background is ignored in argmin.
per_pixel_area
=
(
masks
*
per_pixel_area
+
(
1
-
masks
)
*
per_pixel_area
.
dtype
.
max
)
min_index
=
tf
.
cast
(
tf
.
argmin
(
per_pixel_area
,
axis
=
0
),
tf
.
int32
)
filtered_masks
=
(
tf
.
range
(
num_instances
)[:,
tf
.
newaxis
,
tf
.
newaxis
]
==
min_index
[
tf
.
newaxis
,
:,
:]
)
return
tf
.
cast
(
filtered_masks
,
tf
.
float32
)
*
masks
return
tf
.
cond
(
num_instances
>
0
,
_filter_min_area
,
lambda
:
masks
)
def
filter_mask_overlap
(
masks
,
method
=
'min_area'
):
if
method
==
'min_area'
:
return
filter_mask_overlap_min_area
(
masks
)
else
:
raise
ValueError
(
'Unknown mask overlap filter type - {}'
.
format
(
method
))
class
CenterNetCornerOffsetTargetAssigner
(
object
):
"""Wrapper to compute corner offsets for boxes using masks."""
def
__init__
(
self
,
stride
,
overlap_resolution
=
'min_area'
):
"""Initializes the corner offset target assigner.
Args:
stride: int, the stride of the network in output pixels.
overlap_resolution: string, specifies how we handle overlapping
instance masks. Currently only 'min_area' is supported which assigns
overlapping pixels to the instance with the minimum area.
"""
self
.
_stride
=
stride
self
.
_overlap_resolution
=
overlap_resolution
def
assign_corner_offset_targets
(
self
,
gt_boxes_list
,
gt_masks_list
):
"""Computes the corner offset targets and foreground map.
For each pixel that is part of any object's foreground, this function
computes the relative offsets to the top-left and bottom-right corners of
that instance's bounding box. It also returns a foreground map to indicate
which pixels contain valid corner offsets.
Args:
gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
representing the groundtruth detection bounding boxes for each sample in
the batch. The coordinates are expected in normalized coordinates.
gt_masks_list: A list of float tensors with shape [num_boxes,
input_height, input_width] with values in {0, 1} representing instance
masks for each object.
Returns:
corner_offsets: A float tensor of shape [batch_size, height, width, 4]
containing, in order, the (y, x) offsets to the top left corner and
the (y, x) offsets to the bottom right corner for each foregroung pixel
foreground: A float tensor of shape [batch_size, height, width] in which
each pixel is set to 1 if it is a part of any instance's foreground
(and thus contains valid corner offsets) and 0 otherwise.
"""
_
,
input_height
,
input_width
=
(
shape_utils
.
combined_static_and_dynamic_shape
(
gt_masks_list
[
0
]))
output_height
=
input_height
//
self
.
_stride
output_width
=
input_width
//
self
.
_stride
y_grid
,
x_grid
=
tf
.
meshgrid
(
tf
.
range
(
output_height
),
tf
.
range
(
output_width
),
indexing
=
'ij'
)
y_grid
,
x_grid
=
tf
.
cast
(
y_grid
,
tf
.
float32
),
tf
.
cast
(
x_grid
,
tf
.
float32
)
corner_targets
=
[]
foreground_targets
=
[]
for
gt_masks
,
gt_boxes
in
zip
(
gt_masks_list
,
gt_boxes_list
):
gt_masks
=
_resize_masks
(
gt_masks
,
output_height
,
output_width
,
method
=
ResizeMethod
.
NEAREST_NEIGHBOR
)
gt_masks
=
filter_mask_overlap
(
gt_masks
,
self
.
_overlap_resolution
)
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
unstack
(
gt_boxes
,
axis
=
1
)
ymin
,
ymax
=
ymin
*
output_height
,
ymax
*
output_height
xmin
,
xmax
=
xmin
*
output_width
,
xmax
*
output_width
top_y
=
ymin
[:,
tf
.
newaxis
,
tf
.
newaxis
]
-
y_grid
[
tf
.
newaxis
]
left_x
=
xmin
[:,
tf
.
newaxis
,
tf
.
newaxis
]
-
x_grid
[
tf
.
newaxis
]
bottom_y
=
ymax
[:,
tf
.
newaxis
,
tf
.
newaxis
]
-
y_grid
[
tf
.
newaxis
]
right_x
=
xmax
[:,
tf
.
newaxis
,
tf
.
newaxis
]
-
x_grid
[
tf
.
newaxis
]
foreground_target
=
tf
.
cast
(
tf
.
reduce_sum
(
gt_masks
,
axis
=
0
)
>
0.5
,
tf
.
float32
)
foreground_targets
.
append
(
foreground_target
)
corner_target
=
tf
.
stack
([
tf
.
reduce_sum
(
top_y
*
gt_masks
,
axis
=
0
),
tf
.
reduce_sum
(
left_x
*
gt_masks
,
axis
=
0
),
tf
.
reduce_sum
(
bottom_y
*
gt_masks
,
axis
=
0
),
tf
.
reduce_sum
(
right_x
*
gt_masks
,
axis
=
0
),
],
axis
=
2
)
corner_targets
.
append
(
corner_target
)
return
(
tf
.
stack
(
corner_targets
,
axis
=
0
),
tf
.
stack
(
foreground_targets
,
axis
=
0
))
research/object_detection/core/target_assigner_test.py
View file @
5a2cf36f
...
...
@@ -1906,6 +1906,274 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase):
expected_seg_target
,
segmentation_target
)
class
CenterNetDensePoseTargetAssignerTest
(
test_case
.
TestCase
):
def
test_assign_part_and_coordinate_targets
(
self
):
def
graph_fn
():
gt_dp_num_points_list
=
[
# Example 0.
tf
.
constant
([
2
,
0
,
3
],
dtype
=
tf
.
int32
),
# Example 1.
tf
.
constant
([
1
,
1
],
dtype
=
tf
.
int32
),
]
gt_dp_part_ids_list
=
[
# Example 0.
tf
.
constant
([[
1
,
6
,
0
],
[
0
,
0
,
0
],
[
0
,
2
,
3
]],
dtype
=
tf
.
int32
),
# Example 1.
tf
.
constant
([[
7
,
0
,
0
],
[
0
,
0
,
0
]],
dtype
=
tf
.
int32
),
]
gt_dp_surface_coords_list
=
[
# Example 0.
tf
.
constant
(
[[[
0.11
,
0.2
,
0.3
,
0.4
],
# Box 0.
[
0.6
,
0.4
,
0.1
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]],
[[
0.0
,
0.0
,
0.0
,
0.0
],
# Box 1.
[
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]],
[[
0.22
,
0.1
,
0.6
,
0.8
],
# Box 2.
[
0.0
,
0.4
,
0.5
,
1.0
],
[
0.3
,
0.2
,
0.4
,
0.1
]]],
dtype
=
tf
.
float32
),
# Example 1.
tf
.
constant
(
[[[
0.5
,
0.5
,
0.3
,
1.0
],
# Box 0.
[
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]],
[[
0.2
,
0.2
,
0.5
,
0.8
],
# Box 1.
[
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]]],
dtype
=
tf
.
float32
),
]
gt_weights_list
=
[
# Example 0.
tf
.
constant
([
1.0
,
1.0
,
0.5
],
dtype
=
tf
.
float32
),
# Example 1.
tf
.
constant
([
0.0
,
1.0
],
dtype
=
tf
.
float32
),
]
cn_assigner
=
targetassigner
.
CenterNetDensePoseTargetAssigner
(
stride
=
4
)
batch_indices
,
batch_part_ids
,
batch_surface_coords
,
batch_weights
=
(
cn_assigner
.
assign_part_and_coordinate_targets
(
height
=
120
,
width
=
80
,
gt_dp_num_points_list
=
gt_dp_num_points_list
,
gt_dp_part_ids_list
=
gt_dp_part_ids_list
,
gt_dp_surface_coords_list
=
gt_dp_surface_coords_list
,
gt_weights_list
=
gt_weights_list
))
return
batch_indices
,
batch_part_ids
,
batch_surface_coords
,
batch_weights
batch_indices
,
batch_part_ids
,
batch_surface_coords
,
batch_weights
=
(
self
.
execute
(
graph_fn
,
[]))
expected_batch_indices
=
np
.
array
([
# Example 0. e.g.
# The first set of indices is calculated as follows:
# floor(0.11*120/4) = 3, floor(0.2*80/4) = 4.
[
0
,
3
,
4
,
1
],
[
0
,
18
,
8
,
6
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
6
,
2
,
0
],
[
0
,
0
,
8
,
2
],
[
0
,
9
,
4
,
3
],
# Example 1.
[
1
,
15
,
10
,
7
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
6
,
4
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]
],
dtype
=
np
.
int32
)
expected_batch_part_ids
=
tf
.
one_hot
(
[
1
,
6
,
0
,
0
,
0
,
0
,
0
,
2
,
3
,
7
,
0
,
0
,
0
,
0
,
0
],
depth
=
24
).
numpy
()
expected_batch_surface_coords
=
np
.
array
([
# Box 0.
[
0.3
,
0.4
],
[
0.1
,
0.0
],
[
0.0
,
0.0
],
[
0.0
,
0.0
],
[
0.0
,
0.0
],
[
0.0
,
0.0
],
[
0.6
,
0.8
],
[
0.5
,
1.0
],
[
0.4
,
0.1
],
# Box 1.
[
0.3
,
1.0
],
[
0.0
,
0.0
],
[
0.0
,
0.0
],
[
0.5
,
0.8
],
[
0.0
,
0.0
],
[
0.0
,
0.0
],
],
np
.
float32
)
expected_batch_weights
=
np
.
array
([
# Box 0.
1.0
,
1.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.5
,
0.5
,
0.5
,
# Box 1.
0.0
,
0.0
,
0.0
,
1.0
,
0.0
,
0.0
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
expected_batch_indices
,
batch_indices
)
self
.
assertAllEqual
(
expected_batch_part_ids
,
batch_part_ids
)
self
.
assertAllClose
(
expected_batch_surface_coords
,
batch_surface_coords
)
self
.
assertAllClose
(
expected_batch_weights
,
batch_weights
)
class
CornerOffsetTargetAssignerTest
(
test_case
.
TestCase
):
def
test_filter_overlap_min_area_empty
(
self
):
"""Test that empty masks work on CPU."""
def
graph_fn
(
masks
):
return
targetassigner
.
filter_mask_overlap_min_area
(
masks
)
masks
=
self
.
execute_cpu
(
graph_fn
,
[
np
.
zeros
((
0
,
5
,
5
),
dtype
=
np
.
float32
)])
self
.
assertEqual
(
masks
.
shape
,
(
0
,
5
,
5
))
def
test_filter_overlap_min_area
(
self
):
"""Test the object with min. area is selected instead of overlap."""
def
graph_fn
(
masks
):
return
targetassigner
.
filter_mask_overlap_min_area
(
masks
)
masks
=
np
.
zeros
((
3
,
4
,
4
),
dtype
=
np
.
float32
)
masks
[
0
,
:
2
,
:
2
]
=
1.0
masks
[
1
,
:
3
,
:
3
]
=
1.0
masks
[
2
,
3
,
3
]
=
1.0
masks
=
self
.
execute
(
graph_fn
,
[
masks
])
self
.
assertAllClose
(
masks
[
0
],
[[
1
,
1
,
0
,
0
],
[
1
,
1
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
masks
[
1
],
[[
0
,
0
,
1
,
0
],
[
0
,
0
,
1
,
0
],
[
1
,
1
,
1
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
masks
[
2
],
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
]])
def
test_assign_corner_offset_single_object
(
self
):
"""Test that corner offsets are correct with a single object."""
assigner
=
targetassigner
.
CenterNetCornerOffsetTargetAssigner
(
stride
=
1
)
def
graph_fn
():
boxes
=
[
tf
.
constant
([[
0.
,
0.
,
1.
,
1.
]])
]
mask
=
np
.
zeros
((
1
,
4
,
4
),
dtype
=
np
.
float32
)
mask
[
0
,
1
:
3
,
1
:
3
]
=
1.0
masks
=
[
tf
.
constant
(
mask
)]
return
assigner
.
assign_corner_offset_targets
(
boxes
,
masks
)
corner_offsets
,
foreground
=
self
.
execute
(
graph_fn
,
[])
self
.
assertAllClose
(
foreground
[
0
],
[[
0
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
0
],
[[
0
,
0
,
0
,
0
],
[
0
,
-
1
,
-
1
,
0
],
[
0
,
-
2
,
-
2
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
1
],
[[
0
,
0
,
0
,
0
],
[
0
,
-
1
,
-
2
,
0
],
[
0
,
-
1
,
-
2
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
2
],
[[
0
,
0
,
0
,
0
],
[
0
,
3
,
3
,
0
],
[
0
,
2
,
2
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
3
],
[[
0
,
0
,
0
,
0
],
[
0
,
3
,
2
,
0
],
[
0
,
3
,
2
,
0
],
[
0
,
0
,
0
,
0
]])
def
test_assign_corner_offset_multiple_objects
(
self
):
"""Test corner offsets are correct with multiple objects."""
assigner
=
targetassigner
.
CenterNetCornerOffsetTargetAssigner
(
stride
=
1
)
def
graph_fn
():
boxes
=
[
tf
.
constant
([[
0.
,
0.
,
1.
,
1.
],
[
0.
,
0.
,
0.
,
0.
]]),
tf
.
constant
([[
0.
,
0.
,
.
25
,
.
25
],
[.
25
,
.
25
,
1.
,
1.
]])
]
mask1
=
np
.
zeros
((
2
,
4
,
4
),
dtype
=
np
.
float32
)
mask1
[
0
,
0
,
0
]
=
1.0
mask1
[
0
,
3
,
3
]
=
1.0
mask2
=
np
.
zeros
((
2
,
4
,
4
),
dtype
=
np
.
float32
)
mask2
[
0
,
:
2
,
:
2
]
=
1.0
mask2
[
1
,
1
:,
1
:]
=
1.0
masks
=
[
tf
.
constant
(
mask1
),
tf
.
constant
(
mask2
)]
return
assigner
.
assign_corner_offset_targets
(
boxes
,
masks
)
corner_offsets
,
foreground
=
self
.
execute
(
graph_fn
,
[])
self
.
assertEqual
(
corner_offsets
.
shape
,
(
2
,
4
,
4
,
4
))
self
.
assertEqual
(
foreground
.
shape
,
(
2
,
4
,
4
))
self
.
assertAllClose
(
foreground
[
0
],
[[
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
0
],
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
-
3
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
1
],
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
-
3
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
2
],
[[
4
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
3
],
[[
4
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
]])
self
.
assertAllClose
(
foreground
[
1
],
[[
1
,
1
,
0
,
0
],
[
1
,
1
,
1
,
1
],
[
0
,
1
,
1
,
1
],
[
0
,
1
,
1
,
1
]])
self
.
assertAllClose
(
corner_offsets
[
1
,
:,
:,
0
],
[[
0
,
0
,
0
,
0
],
[
-
1
,
-
1
,
0
,
0
],
[
0
,
-
1
,
-
1
,
-
1
],
[
0
,
-
2
,
-
2
,
-
2
]])
self
.
assertAllClose
(
corner_offsets
[
1
,
:,
:,
1
],
[[
0
,
-
1
,
0
,
0
],
[
0
,
-
1
,
-
1
,
-
2
],
[
0
,
0
,
-
1
,
-
2
],
[
0
,
0
,
-
1
,
-
2
]])
self
.
assertAllClose
(
corner_offsets
[
1
,
:,
:,
2
],
[[
1
,
1
,
0
,
0
],
[
0
,
0
,
3
,
3
],
[
0
,
2
,
2
,
2
],
[
0
,
1
,
1
,
1
]])
self
.
assertAllClose
(
corner_offsets
[
1
,
:,
:,
3
],
[[
1
,
0
,
0
,
0
],
[
1
,
0
,
2
,
1
],
[
0
,
3
,
2
,
1
],
[
0
,
3
,
2
,
1
]])
def
test_assign_corner_offsets_no_objects
(
self
):
"""Test assignment works with empty input on cpu."""
assigner
=
targetassigner
.
CenterNetCornerOffsetTargetAssigner
(
stride
=
1
)
def
graph_fn
():
boxes
=
[
tf
.
zeros
((
0
,
4
),
dtype
=
tf
.
float32
)
]
masks
=
[
tf
.
zeros
((
0
,
5
,
5
),
dtype
=
tf
.
float32
)]
return
assigner
.
assign_corner_offset_targets
(
boxes
,
masks
)
corner_offsets
,
foreground
=
self
.
execute_cpu
(
graph_fn
,
[])
self
.
assertAllClose
(
corner_offsets
,
np
.
zeros
((
1
,
5
,
5
,
4
)))
self
.
assertAllClose
(
foreground
,
np
.
zeros
((
1
,
5
,
5
)))
if
__name__
==
'__main__'
:
tf
.
enable_v2_behavior
()
tf
.
test
.
main
()
research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
View file @
5a2cf36f
...
...
@@ -50,14 +50,16 @@ import io
import
itertools
import
json
import
os
from
absl
import
app
import
apache_beam
as
beam
import
numpy
as
np
import
PIL.Image
import
six
import
tensorflow.compat.v1
as
tf
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
ReKeyDataFn
(
beam
.
DoFn
):
"""Re-keys tfrecords by sequence_key.
...
...
@@ -932,4 +934,4 @@ def main(argv=None, save_main_session=True):
if
__name__
==
'__main__'
:
app
.
run
(
main
)
main
(
)
research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
View file @
5a2cf36f
...
...
@@ -22,7 +22,7 @@ import datetime
import
os
import
tempfile
import
unittest
import
apache_beam
as
beam
import
numpy
as
np
import
six
import
tensorflow.compat.v1
as
tf
...
...
@@ -31,6 +31,12 @@ from object_detection.dataset_tools.context_rcnn import add_context_to_examples
from
object_detection.utils
import
tf_version
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
@
contextlib
.
contextmanager
def
InMemoryTFRecord
(
entries
):
temp
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
)
...
...
research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
View file @
5a2cf36f
...
...
@@ -39,13 +39,16 @@ import io
import
json
import
logging
import
os
from
absl
import
app
import
apache_beam
as
beam
import
numpy
as
np
import
PIL.Image
import
tensorflow.compat.v1
as
tf
from
object_detection.utils
import
dataset_util
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
ParseImage
(
beam
.
DoFn
):
"""A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
...
...
@@ -338,4 +341,4 @@ def main(argv=None, save_main_session=True):
if
__name__
==
'__main__'
:
app
.
run
(
main
)
main
(
)
research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
View file @
5a2cf36f
...
...
@@ -22,7 +22,6 @@ import os
import
tempfile
import
unittest
import
apache_beam
as
beam
import
numpy
as
np
from
PIL
import
Image
...
...
@@ -30,6 +29,11 @@ import tensorflow.compat.v1 as tf
from
object_detection.dataset_tools.context_rcnn
import
create_cococameratraps_tfexample_main
from
object_detection.utils
import
tf_version
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Skipping TF1.X only test.'
)
class
CreateCOCOCameraTrapsTfexampleTest
(
tf
.
test
.
TestCase
):
...
...
research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
View file @
5a2cf36f
...
...
@@ -48,9 +48,11 @@ from __future__ import print_function
import
argparse
import
os
import
threading
from
absl
import
app
import
apache_beam
as
beam
import
tensorflow.compat.v1
as
tf
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
GenerateDetectionDataFn
(
beam
.
DoFn
):
...
...
@@ -290,4 +292,4 @@ def main(argv=None, save_main_session=True):
if
__name__
==
'__main__'
:
app
.
run
(
main
)
main
(
)
research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
View file @
5a2cf36f
...
...
@@ -22,7 +22,6 @@ import contextlib
import
os
import
tempfile
import
unittest
import
apache_beam
as
beam
import
numpy
as
np
import
six
import
tensorflow.compat.v1
as
tf
...
...
@@ -39,6 +38,11 @@ if six.PY2:
else
:
mock
=
unittest
.
mock
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
FakeModel
(
model
.
DetectionModel
):
"""A Fake Detection model with expected output nodes from post-processing."""
...
...
research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
View file @
5a2cf36f
...
...
@@ -34,7 +34,8 @@ python tensorflow_models/object_detection/export_inference_graph.py \
--input_type tf_example \
--pipeline_config_path path/to/faster_rcnn_model.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
--output_directory path/to/exported_model_directory \
--additional_output_tensor_names detection_features
python generate_embedding_data.py \
--alsologtostderr \
...
...
@@ -52,13 +53,15 @@ import datetime
import
os
import
threading
from
absl
import
app
import
apache_beam
as
beam
import
numpy
as
np
import
six
import
tensorflow.compat.v1
as
tf
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
GenerateEmbeddingDataFn
(
beam
.
DoFn
):
"""Generates embedding data for camera trap images.
...
...
@@ -410,5 +413,7 @@ def main(argv=None, save_main_session=True):
p
.
run
()
if
__name__
==
'__main__'
:
app
.
run
(
main
)
main
()
Prev
1
…
6
7
8
9
10
11
12
13
14
…
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment