Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
0c85c06c
Commit
0c85c06c
authored
Feb 24, 2021
by
Yu-hui Chen
Committed by
TF Object Detection Team
Feb 24, 2021
Browse files
Extended CenterNet model to predict keypoint depth information.
PiperOrigin-RevId: 359344675
parent
3cfd0ba0
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
637 additions
and
97 deletions
+637
-97
research/object_detection/builders/model_builder.py
research/object_detection/builders/model_builder.py
+4
-1
research/object_detection/builders/model_builder_tf2_test.py
research/object_detection/builders/model_builder_tf2_test.py
+6
-0
research/object_detection/meta_architectures/center_net_meta_arch.py
...ject_detection/meta_architectures/center_net_meta_arch.py
+289
-59
research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
...ction/meta_architectures/center_net_meta_arch_tf2_test.py
+323
-36
research/object_detection/protos/center_net.proto
research/object_detection/protos/center_net.proto
+15
-1
No files found.
research/object_detection/builders/model_builder.py
View file @
0c85c06c
...
...
@@ -868,7 +868,10 @@ def keypoint_proto_to_params(kp_config, keypoint_map_dict):
candidate_search_scale
=
kp_config
.
candidate_search_scale
,
candidate_ranking_mode
=
kp_config
.
candidate_ranking_mode
,
offset_peak_radius
=
kp_config
.
offset_peak_radius
,
per_keypoint_offset
=
kp_config
.
per_keypoint_offset
)
per_keypoint_offset
=
kp_config
.
per_keypoint_offset
,
predict_depth
=
kp_config
.
predict_depth
,
per_keypoint_depth
=
kp_config
.
per_keypoint_depth
,
keypoint_depth_loss_weight
=
kp_config
.
keypoint_depth_loss_weight
)
def
object_detection_proto_to_params
(
od_config
):
...
...
research/object_detection/builders/model_builder_tf2_test.py
View file @
0c85c06c
...
...
@@ -116,6 +116,9 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest):
candidate_ranking_mode: "score_distance_ratio"
offset_peak_radius: 3
per_keypoint_offset: true
predict_depth: true
per_keypoint_depth: true
keypoint_depth_loss_weight: 0.3
"""
config
=
text_format
.
Merge
(
task_proto_txt
,
center_net_pb2
.
CenterNet
.
KeypointEstimation
())
...
...
@@ -264,6 +267,9 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest):
self
.
assertEqual
(
kp_params
.
candidate_ranking_mode
,
'score_distance_ratio'
)
self
.
assertEqual
(
kp_params
.
offset_peak_radius
,
3
)
self
.
assertEqual
(
kp_params
.
per_keypoint_offset
,
True
)
self
.
assertEqual
(
kp_params
.
predict_depth
,
True
)
self
.
assertEqual
(
kp_params
.
per_keypoint_depth
,
True
)
self
.
assertAlmostEqual
(
kp_params
.
keypoint_depth_loss_weight
,
0.3
)
# Check mask related parameters.
self
.
assertAlmostEqual
(
model
.
_mask_params
.
task_loss_weight
,
0.7
)
...
...
research/object_detection/meta_architectures/center_net_meta_arch.py
View file @
0c85c06c
...
...
@@ -423,12 +423,12 @@ def prediction_tensors_to_temporal_offsets(
return
offsets
def
prediction_tensors_to_keypoint_candidates
(
keypoint_heatmap_
prediction
s
,
keypoint_heatmap_offsets
,
keypoint_score_threshold
=
0.
1
,
max_pool_kernel_size
=
1
,
max_candidates
=
20
):
def
prediction_tensors_to_keypoint_candidates
(
keypoint_heatmap_predictions
,
keypoint_heatmap_
offset
s
,
keypoint_score_threshold
=
0.1
,
max_pool_kernel_size
=
1
,
max_candidates
=
20
,
keypoint_depths
=
None
):
"""Convert keypoint heatmap predictions and offsets to keypoint candidates.
Args:
...
...
@@ -437,14 +437,17 @@ def prediction_tensors_to_keypoint_candidates(
keypoint_heatmap_offsets: A float tensor of shape [batch_size, height,
width, 2] (or [batch_size, height, width, 2 * num_keypoints] if
'per_keypoint_offset' is set True) representing the per-keypoint offsets.
keypoint_score_threshold: float, the threshold for considering a keypoint
a
candidate.
keypoint_score_threshold: float, the threshold for considering a keypoint
a
candidate.
max_pool_kernel_size: integer, the max pool kernel size to use to pull off
peak score locations in a neighborhood. For example, to make sure no two
neighboring values for the same keypoint are returned, set
max_pool_kernel_size=3. If None or 1, will not apply any local filtering.
max_candidates: integer, maximum number of keypoint candidates per
keypoint type.
max_candidates: integer, maximum number of keypoint candidates per keypoint
type.
keypoint_depths: (optional) A float tensor of shape [batch_size, height,
width, 1] (or [batch_size, height, width, num_keypoints] if
'per_keypoint_depth' is set True) representing the per-keypoint depths.
Returns:
keypoint_candidates: A tensor of shape
...
...
@@ -458,6 +461,9 @@ def prediction_tensors_to_keypoint_candidates(
[batch_size, num_keypoints] with the number of candidates for each
keypoint type, as it's possible to filter some candidates due to the score
threshold.
depth_candidates: A tensor of shape [batch_size, max_candidates,
num_keypoints] representing the estimated depth of each keypoint
candidate. Return None if the input keypoint_depths is None.
"""
batch_size
,
_
,
_
,
num_keypoints
=
_get_shape
(
keypoint_heatmap_predictions
,
4
)
# Get x, y and channel indices corresponding to the top indices in the
...
...
@@ -499,13 +505,13 @@ def prediction_tensors_to_keypoint_candidates(
# TF Lite does not support tf.gather with batch_dims > 0, so we need to use
# tf_gather_nd instead and here we prepare the indices for that. In this
# case, channel_indices indicates which keypoint to use the offset from.
combined_indices
=
tf
.
stack
([
channel_
combined_indices
=
tf
.
stack
([
_multi_range
(
batch_size
,
value_repetitions
=
num_indices
),
_multi_range
(
num_indices
,
range_repetitions
=
batch_size
),
tf
.
reshape
(
channel_indices
,
[
-
1
])
],
axis
=
1
)
offsets
=
tf
.
gather_nd
(
reshaped_offsets
,
combined_indices
)
offsets
=
tf
.
gather_nd
(
reshaped_offsets
,
channel_
combined_indices
)
offsets
=
tf
.
reshape
(
offsets
,
[
batch_size
,
num_indices
,
-
1
])
else
:
offsets
=
selected_offsets
...
...
@@ -524,14 +530,38 @@ def prediction_tensors_to_keypoint_candidates(
num_candidates
=
tf
.
reduce_sum
(
tf
.
to_int32
(
keypoint_scores
>=
keypoint_score_threshold
),
axis
=
1
)
return
keypoint_candidates
,
keypoint_scores
,
num_candidates
depth_candidates
=
None
if
keypoint_depths
is
not
None
:
selected_depth_flat
=
tf
.
gather_nd
(
keypoint_depths
,
combined_indices
)
selected_depth
=
tf
.
reshape
(
selected_depth_flat
,
[
batch_size
,
num_indices
,
-
1
])
_
,
_
,
num_depth_channels
=
_get_shape
(
selected_depth
,
3
)
if
num_depth_channels
>
1
:
combined_indices
=
tf
.
stack
([
_multi_range
(
batch_size
,
value_repetitions
=
num_indices
),
_multi_range
(
num_indices
,
range_repetitions
=
batch_size
),
tf
.
reshape
(
channel_indices
,
[
-
1
])
],
axis
=
1
)
depth
=
tf
.
gather_nd
(
selected_depth
,
combined_indices
)
depth
=
tf
.
reshape
(
depth
,
[
batch_size
,
num_indices
,
-
1
])
else
:
depth
=
selected_depth
depth_candidates
=
tf
.
reshape
(
depth
,
[
batch_size
,
num_keypoints
,
max_candidates
])
depth_candidates
=
tf
.
transpose
(
depth_candidates
,
[
0
,
2
,
1
])
return
keypoint_candidates
,
keypoint_scores
,
num_candidates
,
depth_candidates
def
prediction_to_single_instance_keypoints
(
object_heatmap
,
keypoint_heatmap
,
def
prediction_to_single_instance_keypoints
(
object_heatmap
,
keypoint_heatmap
,
keypoint_offset
,
keypoint_regression
,
stride
,
keypoint_regression
,
stride
,
object_center_std_dev
,
keypoint_std_dev
,
kp_params
):
keypoint_std_dev
,
kp_params
,
keypoint_depths
=
None
):
"""Postprocess function to predict single instance keypoints.
This is a simplified postprocessing function based on the assumption that
...
...
@@ -569,6 +599,9 @@ def prediction_to_single_instance_keypoints(object_heatmap, keypoint_heatmap,
representing the standard deviation corresponding to each joint.
kp_params: A `KeypointEstimationParams` object with parameters for a single
keypoint class.
keypoint_depths: (optional) A float tensor of shape [batch_size, height,
width, 1] (or [batch_size, height, width, num_keypoints] if
'per_keypoint_depth' is set True) representing the per-keypoint depths.
Returns:
A tuple of two tensors:
...
...
@@ -577,6 +610,9 @@ def prediction_to_single_instance_keypoints(object_heatmap, keypoint_heatmap,
map space.
keypoint_scores: A float tensor with shape [1, 1, num_keypoints]
representing the keypoint prediction scores.
keypoint_depths: A float tensor with shape [1, 1, num_keypoints]
representing the estimated keypoint depths. Return None if the input
keypoint_depths is None.
Raises:
ValueError: if the input keypoint_std_dev doesn't have valid number of
...
...
@@ -636,14 +672,16 @@ def prediction_to_single_instance_keypoints(object_heatmap, keypoint_heatmap,
# Get the keypoint locations/scores:
# keypoint_candidates: [1, 1, num_keypoints, 2]
# keypoint_scores: [1, 1, num_keypoints]
(
keypoint_candidates
,
keypoint_scores
,
_
)
=
prediction_tensors_to_keypoint_candidates
(
# depth_candidates: [1, 1, num_keypoints]
(
keypoint_candidates
,
keypoint_scores
,
_
,
depth_candidates
)
=
prediction_tensors_to_keypoint_candidates
(
keypoint_predictions
,
keypoint_offset
,
keypoint_score_threshold
=
kp_params
.
keypoint_candidate_score_threshold
,
max_pool_kernel_size
=
kp_params
.
peak_max_pool_kernel_size
,
max_candidates
=
1
)
return
keypoint_candidates
,
keypoint_scores
max_candidates
=
1
,
keypoint_depths
=
keypoint_depths
)
return
keypoint_candidates
,
keypoint_scores
,
depth_candidates
def
regressed_keypoints_at_object_centers
(
regressed_keypoint_predictions
,
...
...
@@ -697,11 +735,16 @@ def regressed_keypoints_at_object_centers(regressed_keypoint_predictions,
[
batch_size
,
num_instances
,
-
1
])
def
refine_keypoints
(
regressed_keypoints
,
keypoint_candidates
,
keypoint_scores
,
num_keypoint_candidates
,
bboxes
=
None
,
unmatched_keypoint_score
=
0.1
,
box_scale
=
1.2
,
def
refine_keypoints
(
regressed_keypoints
,
keypoint_candidates
,
keypoint_scores
,
num_keypoint_candidates
,
bboxes
=
None
,
unmatched_keypoint_score
=
0.1
,
box_scale
=
1.2
,
candidate_search_scale
=
0.3
,
candidate_ranking_mode
=
'min_distance'
):
candidate_ranking_mode
=
'min_distance'
,
keypoint_depth_candidates
=
None
):
"""Refines regressed keypoints by snapping to the nearest candidate keypoints.
The initial regressed keypoints represent a full set of keypoints regressed
...
...
@@ -757,6 +800,9 @@ def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores,
candidate_ranking_mode: A string as one of ['min_distance',
'score_distance_ratio'] indicating how to select the candidate. If invalid
value is provided, an ValueError will be raised.
keypoint_depth_candidates: (optional) A float tensor of shape
[batch_size, max_candidates, num_keypoints] indicating the depths for
keypoint candidates.
Returns:
A tuple with:
...
...
@@ -836,9 +882,11 @@ def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores,
# Gather the coordinates and scores corresponding to the closest candidates.
# Shape of tensors are [batch_size, num_instances, num_keypoints, 2] and
# [batch_size, num_instances, num_keypoints], respectively.
nearby_candidate_coords
,
nearby_candidate_scores
=
(
_gather_candidates_at_indices
(
keypoint_candidates
,
keypoint_scores
,
nearby_candidate_inds
))
(
nearby_candidate_coords
,
nearby_candidate_scores
,
nearby_candidate_depths
)
=
(
_gather_candidates_at_indices
(
keypoint_candidates
,
keypoint_scores
,
nearby_candidate_inds
,
keypoint_depth_candidates
))
if
bboxes
is
None
:
# Create bboxes from regressed keypoints.
...
...
@@ -895,7 +943,12 @@ def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores,
unmatched_keypoint_score
*
tf
.
ones_like
(
nearby_candidate_scores
),
nearby_candidate_scores
)
return
refined_keypoints
,
refined_scores
refined_depths
=
None
if
nearby_candidate_depths
is
not
None
:
refined_depths
=
tf
.
where
(
mask
,
tf
.
zeros_like
(
nearby_candidate_depths
),
nearby_candidate_depths
)
return
refined_keypoints
,
refined_scores
,
refined_depths
def
_pad_to_full_keypoint_dim
(
keypoint_coords
,
keypoint_scores
,
keypoint_inds
,
...
...
@@ -976,8 +1029,10 @@ def _pad_to_full_instance_dim(keypoint_coords, keypoint_scores, instance_inds,
return
keypoint_coords_padded
,
keypoint_scores_padded
def
_gather_candidates_at_indices
(
keypoint_candidates
,
keypoint_scores
,
indices
):
def
_gather_candidates_at_indices
(
keypoint_candidates
,
keypoint_scores
,
indices
,
keypoint_depth_candidates
=
None
):
"""Gathers keypoint candidate coordinates and scores at indices.
Args:
...
...
@@ -987,13 +1042,18 @@ def _gather_candidates_at_indices(keypoint_candidates, keypoint_scores,
num_keypoints] with keypoint scores.
indices: an integer tensor of shape [batch_size, num_indices, num_keypoints]
with indices.
keypoint_depth_candidates: (optional) a float tensor of shape [batch_size,
max_candidates, num_keypoints] with keypoint depths.
Returns:
A tuple with
gathered_keypoint_candidates: a float tensor of shape [batch_size,
num_indices, num_keypoints, 2] with gathered coordinates.
gathered_keypoint_scores: a float tensor of shape [batch_size,
num_indices, num_keypoints, 2].
num_indices, num_keypoints].
gathered_keypoint_depths: a float tensor of shape [batch_size,
num_indices, num_keypoints]. Return None if the input
keypoint_depth_candidates is None.
"""
batch_size
,
num_indices
,
num_keypoints
=
_get_shape
(
indices
,
3
)
...
...
@@ -1035,7 +1095,19 @@ def _gather_candidates_at_indices(keypoint_candidates, keypoint_scores,
gathered_keypoint_scores
=
tf
.
transpose
(
nearby_candidate_scores_transposed
,
[
0
,
2
,
1
])
return
gathered_keypoint_candidates
,
gathered_keypoint_scores
gathered_keypoint_depths
=
None
if
keypoint_depth_candidates
is
not
None
:
keypoint_depths_transposed
=
tf
.
transpose
(
keypoint_depth_candidates
,
[
0
,
2
,
1
])
nearby_candidate_depths_transposed
=
tf
.
gather_nd
(
keypoint_depths_transposed
,
combined_indices
)
nearby_candidate_depths_transposed
=
tf
.
reshape
(
nearby_candidate_depths_transposed
,
[
batch_size
,
num_keypoints
,
num_indices
])
gathered_keypoint_depths
=
tf
.
transpose
(
nearby_candidate_depths_transposed
,
[
0
,
2
,
1
])
return
(
gathered_keypoint_candidates
,
gathered_keypoint_scores
,
gathered_keypoint_depths
)
def
flattened_indices_from_row_col_indices
(
row_indices
,
col_indices
,
num_cols
):
...
...
@@ -1517,7 +1589,8 @@ class KeypointEstimationParams(
'heatmap_bias_init'
,
'num_candidates_per_keypoint'
,
'task_loss_weight'
,
'peak_max_pool_kernel_size'
,
'unmatched_keypoint_score'
,
'box_scale'
,
'candidate_search_scale'
,
'candidate_ranking_mode'
,
'offset_peak_radius'
,
'per_keypoint_offset'
'offset_peak_radius'
,
'per_keypoint_offset'
,
'predict_depth'
,
'per_keypoint_depth'
,
'keypoint_depth_loss_weight'
])):
"""Namedtuple to host object detection related parameters.
...
...
@@ -1550,7 +1623,10 @@ class KeypointEstimationParams(
candidate_search_scale
=
0.3
,
candidate_ranking_mode
=
'min_distance'
,
offset_peak_radius
=
0
,
per_keypoint_offset
=
False
):
per_keypoint_offset
=
False
,
predict_depth
=
False
,
per_keypoint_depth
=
False
,
keypoint_depth_loss_weight
=
1.0
):
"""Constructor with default values for KeypointEstimationParams.
Args:
...
...
@@ -1614,6 +1690,12 @@ class KeypointEstimationParams(
original paper). If set True, the output offset target has the shape
[batch_size, out_height, out_width, 2 * num_keypoints] (recommended when
the offset_peak_radius is not zero).
predict_depth: A bool indicates whether to predict the depth of each
keypoints.
per_keypoint_depth: A bool indicates whether the model predicts the depth
of each keypoints in independent channels. Similar to
per_keypoint_offset but for the keypoint depth.
keypoint_depth_loss_weight: The weight of the keypoint depth loss.
Returns:
An initialized KeypointEstimationParams namedtuple.
...
...
@@ -1626,7 +1708,8 @@ class KeypointEstimationParams(
heatmap_bias_init
,
num_candidates_per_keypoint
,
task_loss_weight
,
peak_max_pool_kernel_size
,
unmatched_keypoint_score
,
box_scale
,
candidate_search_scale
,
candidate_ranking_mode
,
offset_peak_radius
,
per_keypoint_offset
)
per_keypoint_offset
,
predict_depth
,
per_keypoint_depth
,
keypoint_depth_loss_weight
)
class
ObjectCenterParams
(
...
...
@@ -1839,6 +1922,7 @@ BOX_OFFSET = 'box/offset'
KEYPOINT_REGRESSION
=
'keypoint/regression'
KEYPOINT_HEATMAP
=
'keypoint/heatmap'
KEYPOINT_OFFSET
=
'keypoint/offset'
KEYPOINT_DEPTH
=
'keypoint/depth'
SEGMENTATION_TASK
=
'segmentation_task'
SEGMENTATION_HEATMAP
=
'segmentation/heatmap'
DENSEPOSE_TASK
=
'densepose_task'
...
...
@@ -2055,6 +2139,15 @@ class CenterNetMetaArch(model.DetectionModel):
use_depthwise
=
self
.
_use_depthwise
)
for
_
in
range
(
num_feature_outputs
)
]
if
kp_params
.
predict_depth
:
num_depth_channel
=
(
num_keypoints
if
kp_params
.
per_keypoint_depth
else
1
)
prediction_heads
[
get_keypoint_name
(
task_name
,
KEYPOINT_DEPTH
)]
=
[
make_prediction_net
(
num_depth_channel
,
use_depthwise
=
self
.
_use_depthwise
)
for
_
in
range
(
num_feature_outputs
)
]
# pylint: disable=g-complex-comprehension
if
self
.
_mask_params
is
not
None
:
prediction_heads
[
SEGMENTATION_HEATMAP
]
=
[
...
...
@@ -2305,6 +2398,7 @@ class CenterNetMetaArch(model.DetectionModel):
heatmap_key
=
get_keypoint_name
(
task_name
,
KEYPOINT_HEATMAP
)
offset_key
=
get_keypoint_name
(
task_name
,
KEYPOINT_OFFSET
)
regression_key
=
get_keypoint_name
(
task_name
,
KEYPOINT_REGRESSION
)
depth_key
=
get_keypoint_name
(
task_name
,
KEYPOINT_DEPTH
)
heatmap_loss
=
self
.
_compute_kp_heatmap_loss
(
input_height
=
input_height
,
input_width
=
input_width
,
...
...
@@ -2332,6 +2426,14 @@ class CenterNetMetaArch(model.DetectionModel):
kp_params
.
keypoint_offset_loss_weight
*
offset_loss
)
loss_dict
[
regression_key
]
=
(
kp_params
.
keypoint_regression_loss_weight
*
reg_loss
)
if
kp_params
.
predict_depth
:
depth_loss
=
self
.
_compute_kp_depth_loss
(
input_height
=
input_height
,
input_width
=
input_width
,
task_name
=
task_name
,
depth_predictions
=
prediction_dict
[
depth_key
],
localization_loss_fn
=
kp_params
.
localization_loss
)
loss_dict
[
depth_key
]
=
kp_params
.
keypoint_depth_loss_weight
*
depth_loss
return
loss_dict
def
_compute_kp_heatmap_loss
(
self
,
input_height
,
input_width
,
task_name
,
...
...
@@ -2501,6 +2603,68 @@ class CenterNetMetaArch(model.DetectionModel):
tf
.
maximum
(
tf
.
reduce_sum
(
batch_weights
),
1.0
))
return
loss
def
_compute_kp_depth_loss
(
self
,
input_height
,
input_width
,
task_name
,
depth_predictions
,
localization_loss_fn
):
"""Computes the loss of the keypoint depth estimation.
Args:
input_height: An integer scalar tensor representing input image height.
input_width: An integer scalar tensor representing input image width.
task_name: A string representing the name of the keypoint task.
depth_predictions: A list of float tensors of shape [batch_size,
out_height, out_width, 1 (or num_keypoints)] representing the prediction
heads of the model for keypoint depth.
localization_loss_fn: An object_detection.core.losses.Loss object to
compute the loss for the keypoint offset predictions in CenterNet.
Returns:
loss: A float scalar tensor representing the keypoint depth loss
normalized by number of total keypoints.
"""
kp_params
=
self
.
_kp_params_dict
[
task_name
]
gt_keypoints_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
keypoints
)
gt_classes_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
classes
)
gt_weights_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
weights
)
gt_keypoint_depths_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
keypoint_depths
)
gt_keypoint_depth_weights_list
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
keypoint_depth_weights
)
assigner
=
self
.
_target_assigner_dict
[
task_name
]
(
batch_indices
,
batch_depths
,
batch_weights
)
=
assigner
.
assign_keypoints_depth_targets
(
height
=
input_height
,
width
=
input_width
,
gt_keypoints_list
=
gt_keypoints_list
,
gt_weights_list
=
gt_weights_list
,
gt_classes_list
=
gt_classes_list
,
gt_keypoint_depths_list
=
gt_keypoint_depths_list
,
gt_keypoint_depth_weights_list
=
gt_keypoint_depth_weights_list
)
if
kp_params
.
per_keypoint_offset
and
not
kp_params
.
per_keypoint_depth
:
batch_indices
=
batch_indices
[:,
0
:
3
]
# Keypoint offset loss.
loss
=
0.0
for
prediction
in
depth_predictions
:
selected_depths
=
cn_assigner
.
get_batch_predictions_from_indices
(
prediction
,
batch_indices
)
if
kp_params
.
per_keypoint_offset
and
kp_params
.
per_keypoint_depth
:
selected_depths
=
tf
.
expand_dims
(
selected_depths
,
axis
=-
1
)
# The dimensions passed are not as per the doc string but the loss
# still computes the correct value.
unweighted_loss
=
localization_loss_fn
(
selected_depths
,
batch_depths
,
weights
=
tf
.
expand_dims
(
tf
.
ones_like
(
batch_weights
),
-
1
))
# Apply the weights after the loss function to have full control over it.
loss
+=
batch_weights
*
tf
.
squeeze
(
unweighted_loss
,
axis
=
1
)
loss
=
tf
.
reduce_sum
(
loss
)
/
(
float
(
len
(
depth_predictions
))
*
tf
.
maximum
(
tf
.
reduce_sum
(
batch_weights
),
1.0
))
return
loss
def
_compute_segmentation_losses
(
self
,
prediction_dict
,
per_pixel_weights
):
"""Computes all the losses associated with segmentation.
...
...
@@ -3051,9 +3215,10 @@ class CenterNetMetaArch(model.DetectionModel):
# keypoint, we fall back to a simpler postprocessing function which uses
# the ops that are supported by tf.lite on GPU.
if
len
(
self
.
_kp_params_dict
)
==
1
and
self
.
_num_classes
==
1
:
keypoints
,
keypoint_scores
=
self
.
_postprocess_keypoints_single_class
(
prediction_dict
,
classes
,
y_indices
,
x_indices
,
boxes_strided
,
num_detections
)
(
keypoints
,
keypoint_scores
,
keypoint_depths
)
=
self
.
_postprocess_keypoints_single_class
(
prediction_dict
,
classes
,
y_indices
,
x_indices
,
boxes_strided
,
num_detections
)
# The map_fn used to clip out of frame keypoints creates issues when
# converting to tf.lite model so we disable it and let the users to
# handle those out of frame keypoints.
...
...
@@ -3061,7 +3226,18 @@ class CenterNetMetaArch(model.DetectionModel):
convert_strided_predictions_to_normalized_keypoints
(
keypoints
,
keypoint_scores
,
self
.
_stride
,
true_image_shapes
,
clip_out_of_frame_keypoints
=
False
))
if
keypoint_depths
is
not
None
:
postprocess_dict
.
update
({
fields
.
DetectionResultFields
.
detection_keypoint_depths
:
keypoint_depths
})
else
:
# Multi-class keypoint estimation task does not support depth
# estimation.
assert
all
([
not
kp_dict
.
predict_depth
for
kp_dict
in
self
.
_kp_params_dict
.
values
()
])
keypoints
,
keypoint_scores
=
self
.
_postprocess_keypoints_multi_class
(
prediction_dict
,
classes
,
y_indices
,
x_indices
,
boxes_strided
,
num_detections
)
...
...
@@ -3200,7 +3376,11 @@ class CenterNetMetaArch(model.DetectionModel):
task_name
,
KEYPOINT_REGRESSION
)][
-
1
]
object_heatmap
=
tf
.
nn
.
sigmoid
(
prediction_dict
[
OBJECT_CENTER
][
-
1
])
keypoints
,
keypoint_scores
=
(
keypoint_depths
=
None
if
kp_params
.
predict_depth
:
keypoint_depths
=
prediction_dict
[
get_keypoint_name
(
task_name
,
KEYPOINT_DEPTH
)][
-
1
]
keypoints
,
keypoint_scores
,
keypoint_depths
=
(
prediction_to_single_instance_keypoints
(
object_heatmap
=
object_heatmap
,
keypoint_heatmap
=
keypoint_heatmap
,
...
...
@@ -3209,7 +3389,8 @@ class CenterNetMetaArch(model.DetectionModel):
stride
=
self
.
_stride
,
object_center_std_dev
=
object_center_std_dev
,
keypoint_std_dev
=
keypoint_std_dev
,
kp_params
=
kp_params
))
kp_params
=
kp_params
,
keypoint_depths
=
keypoint_depths
))
keypoints
,
keypoint_scores
=
(
convert_strided_predictions_to_normalized_keypoints
(
...
...
@@ -3222,6 +3403,12 @@ class CenterNetMetaArch(model.DetectionModel):
fields
.
DetectionResultFields
.
detection_keypoints
:
keypoints
,
fields
.
DetectionResultFields
.
detection_keypoint_scores
:
keypoint_scores
}
if
kp_params
.
predict_depth
:
postprocess_dict
.
update
({
fields
.
DetectionResultFields
.
detection_keypoint_depths
:
keypoint_depths
})
return
postprocess_dict
def
_postprocess_embeddings
(
self
,
prediction_dict
,
y_indices
,
x_indices
):
...
...
@@ -3316,7 +3503,7 @@ class CenterNetMetaArch(model.DetectionModel):
# [1, num_instances_i, num_keypoints_i], respectively. Note that
# num_instances_i and num_keypoints_i refers to the number of
# instances and keypoints for class i, respectively.
kpt_coords_for_class
,
kpt_scores_for_class
=
(
(
kpt_coords_for_class
,
kpt_scores_for_class
,
_
)
=
(
self
.
_postprocess_keypoints_for_class_and_image
(
keypoint_heatmap
,
keypoint_offsets
,
keypoint_regression
,
classes
,
y_indices_for_kpt_class
,
x_indices_for_kpt_class
,
...
...
@@ -3426,21 +3613,35 @@ class CenterNetMetaArch(model.DetectionModel):
get_keypoint_name
(
task_name
,
KEYPOINT_OFFSET
)][
-
1
]
keypoint_regression
=
prediction_dict
[
get_keypoint_name
(
task_name
,
KEYPOINT_REGRESSION
)][
-
1
]
keypoint_depth_predictions
=
None
if
kp_params
.
predict_depth
:
keypoint_depth_predictions
=
prediction_dict
[
get_keypoint_name
(
task_name
,
KEYPOINT_DEPTH
)][
-
1
]
batch_size
,
_
,
_
=
_get_shape
(
boxes
,
3
)
kpt_coords_for_example_list
=
[]
kpt_scores_for_example_list
=
[]
kpt_depths_for_example_list
=
[]
for
ex_ind
in
range
(
batch_size
):
# Postprocess keypoints and scores for class and single image. Shapes
# are [1, max_detections, num_keypoints, 2] and
# [1, max_detections, num_keypoints], respectively.
kpt_coords_for_class
,
kpt_scores_for_class
=
(
(
kpt_coords_for_class
,
kpt_scores_for_class
,
kpt_depths_for_class
)
=
(
self
.
_postprocess_keypoints_for_class_and_image
(
keypoint_heatmap
,
keypoint_offsets
,
keypoint_regression
,
classes
,
y_indices
,
x_indices
,
boxes
,
ex_ind
,
kp_params
))
keypoint_heatmap
,
keypoint_offsets
,
keypoint_regression
,
classes
,
y_indices
,
x_indices
,
boxes
,
ex_ind
,
kp_params
,
keypoint_depth_predictions
=
keypoint_depth_predictions
))
kpt_coords_for_example_list
.
append
(
kpt_coords_for_class
)
kpt_scores_for_example_list
.
append
(
kpt_scores_for_class
)
kpt_depths_for_example_list
.
append
(
kpt_depths_for_class
)
# Concatenate all keypoints and scores from all examples in the batch.
# Shapes are [batch_size, max_detections, num_keypoints, 2] and
...
...
@@ -3448,7 +3649,11 @@ class CenterNetMetaArch(model.DetectionModel):
keypoints
=
tf
.
concat
(
kpt_coords_for_example_list
,
axis
=
0
)
keypoint_scores
=
tf
.
concat
(
kpt_scores_for_example_list
,
axis
=
0
)
return
keypoints
,
keypoint_scores
keypoint_depths
=
None
if
kp_params
.
predict_depth
:
keypoint_depths
=
tf
.
concat
(
kpt_depths_for_example_list
,
axis
=
0
)
return
keypoints
,
keypoint_scores
,
keypoint_depths
def
_get_instance_indices
(
self
,
classes
,
num_detections
,
batch_index
,
class_id
):
...
...
@@ -3482,8 +3687,17 @@ class CenterNetMetaArch(model.DetectionModel):
return
tf
.
cast
(
instance_inds
,
tf
.
int32
)
def
_postprocess_keypoints_for_class_and_image
(
self
,
keypoint_heatmap
,
keypoint_offsets
,
keypoint_regression
,
classes
,
y_indices
,
x_indices
,
boxes
,
batch_index
,
kp_params
):
self
,
keypoint_heatmap
,
keypoint_offsets
,
keypoint_regression
,
classes
,
y_indices
,
x_indices
,
boxes
,
batch_index
,
kp_params
,
keypoint_depth_predictions
=
None
):
"""Postprocess keypoints for a single image and class.
Args:
...
...
@@ -3504,6 +3718,8 @@ class CenterNetMetaArch(model.DetectionModel):
batch_index: An integer specifying the index for an example in the batch.
kp_params: A `KeypointEstimationParams` object with parameters for a
single keypoint class.
keypoint_depth_predictions: (optional) A [batch_size, height, width, 1]
float32 tensor representing the keypoint depth prediction.
Returns:
A tuple of
...
...
@@ -3514,6 +3730,9 @@ class CenterNetMetaArch(model.DetectionModel):
for the specific class.
refined_scores: A [1, num_instances, num_keypoints] float32 tensor with
keypoint scores.
refined_depths: A [1, num_instances, num_keypoints] float32 tensor with
keypoint depths. Return None if the input keypoint_depth_predictions is
None.
"""
num_keypoints
=
len
(
kp_params
.
keypoint_indices
)
...
...
@@ -3521,6 +3740,10 @@ class CenterNetMetaArch(model.DetectionModel):
keypoint_heatmap
[
batch_index
:
batch_index
+
1
,
...])
keypoint_offsets
=
keypoint_offsets
[
batch_index
:
batch_index
+
1
,
...]
keypoint_regression
=
keypoint_regression
[
batch_index
:
batch_index
+
1
,
...]
keypoint_depths
=
None
if
keypoint_depth_predictions
is
not
None
:
keypoint_depths
=
keypoint_depth_predictions
[
batch_index
:
batch_index
+
1
,
...]
y_indices
=
y_indices
[
batch_index
:
batch_index
+
1
,
...]
x_indices
=
x_indices
[
batch_index
:
batch_index
+
1
,
...]
boxes_slice
=
boxes
[
batch_index
:
batch_index
+
1
,
...]
...
...
@@ -3536,26 +3759,33 @@ class CenterNetMetaArch(model.DetectionModel):
# The shape of keypoint_candidates and keypoint_scores is:
# [1, num_candidates_per_keypoint, num_keypoints, 2] and
# [1, num_candidates_per_keypoint, num_keypoints], respectively.
keypoint_candidates
,
keypoint_scores
,
num_keypoint_candidates
=
(
prediction_tensors_to_keypoint_candidates
(
keypoint_heatmap
,
keypoint_offsets
,
keypoint_score_threshold
=
(
kp_params
.
keypoint_candidate_score_threshold
),
max_pool_kernel_size
=
kp_params
.
peak_max_pool_kernel_size
,
max_candidates
=
kp_params
.
num_candidates_per_keypoint
))
(
keypoint_candidates
,
keypoint_scores
,
num_keypoint_candidates
,
keypoint_depth_candidates
)
=
(
prediction_tensors_to_keypoint_candidates
(
keypoint_heatmap
,
keypoint_offsets
,
keypoint_score_threshold
=
(
kp_params
.
keypoint_candidate_score_threshold
),
max_pool_kernel_size
=
kp_params
.
peak_max_pool_kernel_size
,
max_candidates
=
kp_params
.
num_candidates_per_keypoint
,
keypoint_depths
=
keypoint_depths
))
# Get the refined keypoints and scores, of shape
# [1, num_instances, num_keypoints, 2] and
# [1, num_instances, num_keypoints], respectively.
refined_keypoints
,
refined_scores
=
refine_keypoints
(
regressed_keypoints_for_objects
,
keypoint_candidates
,
keypoint_scores
,
num_keypoint_candidates
,
bboxes
=
boxes_slice
,
(
refined_keypoints
,
refined_scores
,
refined_depths
)
=
refine_keypoints
(
regressed_keypoints_for_objects
,
keypoint_candidates
,
keypoint_scores
,
num_keypoint_candidates
,
bboxes
=
boxes_slice
,
unmatched_keypoint_score
=
kp_params
.
unmatched_keypoint_score
,
box_scale
=
kp_params
.
box_scale
,
candidate_search_scale
=
kp_params
.
candidate_search_scale
,
candidate_ranking_mode
=
kp_params
.
candidate_ranking_mode
)
candidate_ranking_mode
=
kp_params
.
candidate_ranking_mode
,
keypoint_depth_candidates
=
keypoint_depth_candidates
)
return
refined_keypoints
,
refined_scores
return
refined_keypoints
,
refined_scores
,
refined_depths
def
regularization_losses
(
self
):
return
[]
...
...
research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
View file @
0c85c06c
...
...
@@ -695,7 +695,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
keypoint_heatmap_offsets
=
tf
.
constant
(
keypoint_heatmap_offsets_np
,
dtype
=
tf
.
float32
)
keypoint_cands
,
keypoint_scores
,
num_keypoint_candidates
=
(
(
keypoint_cands
,
keypoint_scores
,
num_keypoint_candidates
,
_
)
=
(
cnma
.
prediction_tensors_to_keypoint_candidates
(
keypoint_heatmap
,
keypoint_heatmap_offsets
,
...
...
@@ -780,7 +780,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
keypoint_regression
=
tf
.
constant
(
keypoint_regression_np
,
dtype
=
tf
.
float32
)
(
keypoint_cands
,
keypoint_scores
)
=
(
(
keypoint_cands
,
keypoint_scores
,
_
)
=
(
cnma
.
prediction_to_single_instance_keypoints
(
object_heatmap
,
keypoint_heatmap
,
...
...
@@ -839,7 +839,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
keypoint_heatmap_offsets
=
tf
.
constant
(
keypoint_heatmap_offsets_np
,
dtype
=
tf
.
float32
)
keypoint_cands
,
keypoint_scores
,
num_keypoint_candidates
=
(
(
keypoint_cands
,
keypoint_scores
,
num_keypoint_candidates
,
_
)
=
(
cnma
.
prediction_tensors_to_keypoint_candidates
(
keypoint_heatmap
,
keypoint_heatmap_offsets
,
...
...
@@ -880,6 +880,89 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
np
.
testing
.
assert_array_equal
(
expected_num_keypoint_candidates
,
num_keypoint_candidates
)
@
parameterized
.
parameters
({
'per_keypoint_depth'
:
True
},
{
'per_keypoint_depth'
:
False
})
def
test_keypoint_candidate_prediction_depth
(
self
,
per_keypoint_depth
):
keypoint_heatmap_np
=
np
.
zeros
((
2
,
3
,
3
,
2
),
dtype
=
np
.
float32
)
keypoint_heatmap_np
[
0
,
0
,
0
,
0
]
=
1.0
keypoint_heatmap_np
[
0
,
2
,
1
,
0
]
=
0.7
keypoint_heatmap_np
[
0
,
1
,
1
,
0
]
=
0.6
keypoint_heatmap_np
[
0
,
0
,
2
,
1
]
=
0.7
keypoint_heatmap_np
[
0
,
1
,
1
,
1
]
=
0.3
# Filtered by low score.
keypoint_heatmap_np
[
0
,
2
,
2
,
1
]
=
0.2
keypoint_heatmap_np
[
1
,
1
,
0
,
0
]
=
0.6
keypoint_heatmap_np
[
1
,
2
,
1
,
0
]
=
0.5
keypoint_heatmap_np
[
1
,
0
,
0
,
0
]
=
0.4
keypoint_heatmap_np
[
1
,
0
,
0
,
1
]
=
1.0
keypoint_heatmap_np
[
1
,
0
,
1
,
1
]
=
0.9
keypoint_heatmap_np
[
1
,
2
,
0
,
1
]
=
0.8
if
per_keypoint_depth
:
keypoint_depths_np
=
np
.
zeros
((
2
,
3
,
3
,
2
),
dtype
=
np
.
float32
)
keypoint_depths_np
[
0
,
0
,
0
,
0
]
=
-
1.5
keypoint_depths_np
[
0
,
2
,
1
,
0
]
=
-
1.0
keypoint_depths_np
[
0
,
0
,
2
,
1
]
=
1.5
else
:
keypoint_depths_np
=
np
.
zeros
((
2
,
3
,
3
,
1
),
dtype
=
np
.
float32
)
keypoint_depths_np
[
0
,
0
,
0
,
0
]
=
-
1.5
keypoint_depths_np
[
0
,
2
,
1
,
0
]
=
-
1.0
keypoint_depths_np
[
0
,
0
,
2
,
0
]
=
1.5
keypoint_heatmap_offsets_np
=
np
.
zeros
((
2
,
3
,
3
,
2
),
dtype
=
np
.
float32
)
keypoint_heatmap_offsets_np
[
0
,
0
,
0
]
=
[
0.5
,
0.25
]
keypoint_heatmap_offsets_np
[
0
,
2
,
1
]
=
[
-
0.25
,
0.5
]
keypoint_heatmap_offsets_np
[
0
,
1
,
1
]
=
[
0.0
,
0.0
]
keypoint_heatmap_offsets_np
[
0
,
0
,
2
]
=
[
1.0
,
0.0
]
keypoint_heatmap_offsets_np
[
0
,
2
,
2
]
=
[
1.0
,
1.0
]
keypoint_heatmap_offsets_np
[
1
,
1
,
0
]
=
[
0.25
,
0.5
]
keypoint_heatmap_offsets_np
[
1
,
2
,
1
]
=
[
0.5
,
0.0
]
keypoint_heatmap_offsets_np
[
1
,
0
,
0
]
=
[
0.0
,
-
0.5
]
keypoint_heatmap_offsets_np
[
1
,
0
,
1
]
=
[
0.5
,
-
0.5
]
keypoint_heatmap_offsets_np
[
1
,
2
,
0
]
=
[
-
1.0
,
-
0.5
]
def
graph_fn
():
keypoint_heatmap
=
tf
.
constant
(
keypoint_heatmap_np
,
dtype
=
tf
.
float32
)
keypoint_heatmap_offsets
=
tf
.
constant
(
keypoint_heatmap_offsets_np
,
dtype
=
tf
.
float32
)
keypoint_depths
=
tf
.
constant
(
keypoint_depths_np
,
dtype
=
tf
.
float32
)
(
keypoint_cands
,
keypoint_scores
,
num_keypoint_candidates
,
keypoint_depths
)
=
(
cnma
.
prediction_tensors_to_keypoint_candidates
(
keypoint_heatmap
,
keypoint_heatmap_offsets
,
keypoint_score_threshold
=
0.5
,
max_pool_kernel_size
=
1
,
max_candidates
=
2
,
keypoint_depths
=
keypoint_depths
))
return
(
keypoint_cands
,
keypoint_scores
,
num_keypoint_candidates
,
keypoint_depths
)
(
_
,
keypoint_scores
,
_
,
keypoint_depths
)
=
self
.
execute
(
graph_fn
,
[])
expected_keypoint_scores
=
[
[
# Example 0.
[
1.0
,
0.7
],
# Keypoint 1.
[
0.7
,
0.3
],
# Keypoint 2.
],
[
# Example 1.
[
0.6
,
1.0
],
# Keypoint 1.
[
0.5
,
0.9
],
# Keypoint 2.
],
]
expected_keypoint_depths
=
[
[
[
-
1.5
,
1.5
],
[
-
1.0
,
0.0
],
],
[
[
0.
,
0.
],
[
0.
,
0.
],
],
]
np
.
testing
.
assert_allclose
(
expected_keypoint_scores
,
keypoint_scores
)
np
.
testing
.
assert_allclose
(
expected_keypoint_depths
,
keypoint_depths
)
def
test_regressed_keypoints_at_object_centers
(
self
):
batch_size
=
2
num_keypoints
=
5
...
...
@@ -985,11 +1068,15 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
keypoint_scores
=
tf
.
constant
(
keypoint_scores_np
,
dtype
=
tf
.
float32
)
num_keypoint_candidates
=
tf
.
constant
(
num_keypoints_candidates_np
,
dtype
=
tf
.
int32
)
refined_keypoints
,
refined_scores
=
cnma
.
refine_keypoints
(
regressed_keypoints
,
keypoint_candidates
,
keypoint_scores
,
num_keypoint_candidates
,
bboxes
=
None
,
(
refined_keypoints
,
refined_scores
,
_
)
=
cnma
.
refine_keypoints
(
regressed_keypoints
,
keypoint_candidates
,
keypoint_scores
,
num_keypoint_candidates
,
bboxes
=
None
,
unmatched_keypoint_score
=
unmatched_keypoint_score
,
box_scale
=
1.2
,
candidate_search_scale
=
0.3
,
box_scale
=
1.2
,
candidate_search_scale
=
0.3
,
candidate_ranking_mode
=
candidate_ranking_mode
)
return
refined_keypoints
,
refined_scores
...
...
@@ -1057,7 +1144,8 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
np
.
testing
.
assert_allclose
(
expected_refined_keypoints
,
refined_keypoints
)
np
.
testing
.
assert_allclose
(
expected_refined_scores
,
refined_scores
)
def
test_refine_keypoints_with_bboxes
(
self
):
@
parameterized
.
parameters
({
'predict_depth'
:
True
},
{
'predict_depth'
:
False
})
def
test_refine_keypoints_with_bboxes
(
self
,
predict_depth
):
regressed_keypoints_np
=
np
.
array
(
[
# Example 0.
...
...
@@ -1096,7 +1184,22 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
[
0.7
,
0.4
,
0.0
],
# Candidate 0.
[
0.6
,
0.1
,
0.0
],
# Candidate 1.
]
],
dtype
=
np
.
float32
)
],
dtype
=
np
.
float32
)
keypoint_depths_np
=
np
.
array
(
[
# Example 0.
[
[
-
0.8
,
-
0.9
,
-
1.0
],
# Candidate 0.
[
-
0.6
,
-
0.1
,
-
0.9
],
# Candidate 1.
],
# Example 1.
[
[
-
0.7
,
-
0.4
,
-
0.0
],
# Candidate 0.
[
-
0.6
,
-
0.1
,
-
0.0
],
# Candidate 1.
]
],
dtype
=
np
.
float32
)
num_keypoints_candidates_np
=
np
.
array
(
[
# Example 0.
...
...
@@ -1125,17 +1228,28 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
keypoint_candidates
=
tf
.
constant
(
keypoint_candidates_np
,
dtype
=
tf
.
float32
)
keypoint_scores
=
tf
.
constant
(
keypoint_scores_np
,
dtype
=
tf
.
float32
)
if
predict_depth
:
keypoint_depths
=
tf
.
constant
(
keypoint_depths_np
,
dtype
=
tf
.
float32
)
else
:
keypoint_depths
=
None
num_keypoint_candidates
=
tf
.
constant
(
num_keypoints_candidates_np
,
dtype
=
tf
.
int32
)
bboxes
=
tf
.
constant
(
bboxes_np
,
dtype
=
tf
.
float32
)
refined_keypoints
,
refined_scores
=
cnma
.
refine_keypoints
(
regressed_keypoints
,
keypoint_candidates
,
keypoint_scores
,
num_keypoint_candidates
,
bboxes
=
bboxes
,
unmatched_keypoint_score
=
unmatched_keypoint_score
,
box_scale
=
1.0
,
candidate_search_scale
=
0.3
)
return
refined_keypoints
,
refined_scores
refined_keypoints
,
refined_scores
=
self
.
execute
(
graph_fn
,
[])
(
refined_keypoints
,
refined_scores
,
refined_depths
)
=
cnma
.
refine_keypoints
(
regressed_keypoints
,
keypoint_candidates
,
keypoint_scores
,
num_keypoint_candidates
,
bboxes
=
bboxes
,
unmatched_keypoint_score
=
unmatched_keypoint_score
,
box_scale
=
1.0
,
candidate_search_scale
=
0.3
,
keypoint_depth_candidates
=
keypoint_depths
)
if
predict_depth
:
return
refined_keypoints
,
refined_scores
,
refined_depths
else
:
return
refined_keypoints
,
refined_scores
expected_refined_keypoints
=
np
.
array
(
[
...
...
@@ -1166,8 +1280,17 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
],
],
dtype
=
np
.
float32
)
np
.
testing
.
assert_allclose
(
expected_refined_keypoints
,
refined_keypoints
)
np
.
testing
.
assert_allclose
(
expected_refined_scores
,
refined_scores
)
if
predict_depth
:
refined_keypoints
,
refined_scores
,
refined_depths
=
self
.
execute
(
graph_fn
,
[])
expected_refined_depths
=
np
.
array
([[[
-
0.8
,
0.0
,
0.0
],
[
0.0
,
0.0
,
-
1.0
]],
[[
-
0.7
,
-
0.1
,
0.0
],
[
-
0.7
,
-
0.4
,
0.0
]]])
np
.
testing
.
assert_allclose
(
expected_refined_depths
,
refined_depths
)
else
:
refined_keypoints
,
refined_scores
=
self
.
execute
(
graph_fn
,
[])
np
.
testing
.
assert_allclose
(
expected_refined_keypoints
,
refined_keypoints
)
np
.
testing
.
assert_allclose
(
expected_refined_scores
,
refined_scores
)
def
test_pad_to_full_keypoint_dim
(
self
):
batch_size
=
4
...
...
@@ -1296,7 +1419,11 @@ def get_fake_od_params():
scale_loss_weight
=
0.1
)
def
get_fake_kp_params
(
num_candidates_per_keypoint
=
100
):
def
get_fake_kp_params
(
num_candidates_per_keypoint
=
100
,
per_keypoint_offset
=
False
,
predict_depth
=
False
,
per_keypoint_depth
=
False
,
peak_radius
=
0
):
"""Returns the fake keypoint estimation parameter namedtuple."""
return
cnma
.
KeypointEstimationParams
(
task_name
=
_TASK_NAME
,
...
...
@@ -1306,7 +1433,11 @@ def get_fake_kp_params(num_candidates_per_keypoint=100):
classification_loss
=
losses
.
WeightedSigmoidClassificationLoss
(),
localization_loss
=
losses
.
L1LocalizationLoss
(),
keypoint_candidate_score_threshold
=
0.1
,
num_candidates_per_keypoint
=
num_candidates_per_keypoint
)
num_candidates_per_keypoint
=
num_candidates_per_keypoint
,
per_keypoint_offset
=
per_keypoint_offset
,
predict_depth
=
predict_depth
,
per_keypoint_depth
=
per_keypoint_depth
,
offset_peak_radius
=
peak_radius
)
def
get_fake_mask_params
():
...
...
@@ -1353,7 +1484,11 @@ def build_center_net_meta_arch(build_resnet=False,
num_classes
=
_NUM_CLASSES
,
max_box_predictions
=
5
,
apply_non_max_suppression
=
False
,
detection_only
=
False
):
detection_only
=
False
,
per_keypoint_offset
=
False
,
predict_depth
=
False
,
per_keypoint_depth
=
False
,
peak_radius
=
0
):
"""Builds the CenterNet meta architecture."""
if
build_resnet
:
feature_extractor
=
(
...
...
@@ -1407,7 +1542,10 @@ def build_center_net_meta_arch(build_resnet=False,
object_center_params
=
get_fake_center_params
(
max_box_predictions
),
object_detection_params
=
get_fake_od_params
(),
keypoint_params_dict
=
{
_TASK_NAME
:
get_fake_kp_params
(
num_candidates_per_keypoint
)
_TASK_NAME
:
get_fake_kp_params
(
num_candidates_per_keypoint
,
per_keypoint_offset
,
predict_depth
,
per_keypoint_depth
,
peak_radius
)
},
non_max_suppression_fn
=
non_max_suppression_fn
)
else
:
...
...
@@ -1992,6 +2130,84 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self
.
assertAllEqual
([
1
,
1
,
num_keypoints
],
detections
[
'detection_keypoint_scores'
].
shape
)
@
parameterized
.
parameters
(
{
'per_keypoint_depth'
:
False
},
{
'per_keypoint_depth'
:
True
},
)
def
test_postprocess_single_class_depth
(
self
,
per_keypoint_depth
):
"""Test the postprocess function."""
model
=
build_center_net_meta_arch
(
num_classes
=
1
,
per_keypoint_offset
=
per_keypoint_depth
,
predict_depth
=
True
,
per_keypoint_depth
=
per_keypoint_depth
)
num_keypoints
=
len
(
model
.
_kp_params_dict
[
_TASK_NAME
].
keypoint_indices
)
class_center
=
np
.
zeros
((
1
,
32
,
32
,
1
),
dtype
=
np
.
float32
)
height_width
=
np
.
zeros
((
1
,
32
,
32
,
2
),
dtype
=
np
.
float32
)
offset
=
np
.
zeros
((
1
,
32
,
32
,
2
),
dtype
=
np
.
float32
)
keypoint_heatmaps
=
np
.
zeros
((
1
,
32
,
32
,
num_keypoints
),
dtype
=
np
.
float32
)
keypoint_offsets
=
np
.
zeros
((
1
,
32
,
32
,
2
),
dtype
=
np
.
float32
)
keypoint_regression
=
np
.
random
.
randn
(
1
,
32
,
32
,
num_keypoints
*
2
)
class_probs
=
np
.
zeros
(
1
)
class_probs
[
0
]
=
_logit
(
0.75
)
class_center
[
0
,
16
,
16
]
=
class_probs
height_width
[
0
,
16
,
16
]
=
[
5
,
10
]
offset
[
0
,
16
,
16
]
=
[.
25
,
.
5
]
keypoint_regression
[
0
,
16
,
16
]
=
[
-
1.
,
-
1.
,
-
1.
,
1.
,
1.
,
-
1.
,
1.
,
1.
]
keypoint_heatmaps
[
0
,
14
,
14
,
0
]
=
_logit
(
0.9
)
keypoint_heatmaps
[
0
,
14
,
18
,
1
]
=
_logit
(
0.9
)
keypoint_heatmaps
[
0
,
18
,
14
,
2
]
=
_logit
(
0.9
)
keypoint_heatmaps
[
0
,
18
,
18
,
3
]
=
_logit
(
0.05
)
# Note the low score.
if
per_keypoint_depth
:
keypoint_depth
=
np
.
zeros
((
1
,
32
,
32
,
num_keypoints
),
dtype
=
np
.
float32
)
keypoint_depth
[
0
,
14
,
14
,
0
]
=
-
1.0
keypoint_depth
[
0
,
14
,
18
,
1
]
=
-
1.1
keypoint_depth
[
0
,
18
,
14
,
2
]
=
-
1.2
keypoint_depth
[
0
,
18
,
18
,
3
]
=
-
1.3
else
:
keypoint_depth
=
np
.
zeros
((
1
,
32
,
32
,
1
),
dtype
=
np
.
float32
)
keypoint_depth
[
0
,
14
,
14
,
0
]
=
-
1.0
keypoint_depth
[
0
,
14
,
18
,
0
]
=
-
1.1
keypoint_depth
[
0
,
18
,
14
,
0
]
=
-
1.2
keypoint_depth
[
0
,
18
,
18
,
0
]
=
-
1.3
class_center
=
tf
.
constant
(
class_center
)
height_width
=
tf
.
constant
(
height_width
)
offset
=
tf
.
constant
(
offset
)
keypoint_heatmaps
=
tf
.
constant
(
keypoint_heatmaps
,
dtype
=
tf
.
float32
)
keypoint_offsets
=
tf
.
constant
(
keypoint_offsets
,
dtype
=
tf
.
float32
)
keypoint_regression
=
tf
.
constant
(
keypoint_regression
,
dtype
=
tf
.
float32
)
keypoint_depth
=
tf
.
constant
(
keypoint_depth
,
dtype
=
tf
.
float32
)
prediction_dict
=
{
cnma
.
OBJECT_CENTER
:
[
class_center
],
cnma
.
BOX_SCALE
:
[
height_width
],
cnma
.
BOX_OFFSET
:
[
offset
],
cnma
.
get_keypoint_name
(
_TASK_NAME
,
cnma
.
KEYPOINT_HEATMAP
):
[
keypoint_heatmaps
],
cnma
.
get_keypoint_name
(
_TASK_NAME
,
cnma
.
KEYPOINT_OFFSET
):
[
keypoint_offsets
],
cnma
.
get_keypoint_name
(
_TASK_NAME
,
cnma
.
KEYPOINT_REGRESSION
):
[
keypoint_regression
],
cnma
.
get_keypoint_name
(
_TASK_NAME
,
cnma
.
KEYPOINT_DEPTH
):
[
keypoint_depth
]
}
def
graph_fn
():
detections
=
model
.
postprocess
(
prediction_dict
,
tf
.
constant
([[
128
,
128
,
3
]]))
return
detections
detections
=
self
.
execute_cpu
(
graph_fn
,
[])
self
.
assertAllClose
(
detections
[
'detection_keypoint_depths'
][
0
,
0
],
np
.
array
([
-
1.0
,
-
1.1
,
-
1.2
,
0.0
]))
self
.
assertAllClose
(
detections
[
'detection_keypoint_scores'
][
0
,
0
],
np
.
array
([
0.9
,
0.9
,
0.9
,
0.1
]))
def
test_get_instance_indices
(
self
):
classes
=
tf
.
constant
([[
0
,
1
,
2
,
0
],
[
2
,
1
,
2
,
2
]],
dtype
=
tf
.
int32
)
num_detections
=
tf
.
constant
([
1
,
3
],
dtype
=
tf
.
int32
)
...
...
@@ -2003,7 +2219,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self
.
assertAllEqual
(
valid_indices
.
numpy
(),
[
0
,
2
])
def
get_fake_prediction_dict
(
input_height
,
input_width
,
stride
):
def
get_fake_prediction_dict
(
input_height
,
input_width
,
stride
,
per_keypoint_depth
=
False
):
"""Prepares the fake prediction dictionary."""
output_height
=
input_height
//
stride
output_width
=
input_width
//
stride
...
...
@@ -2038,6 +2257,11 @@ def get_fake_prediction_dict(input_height, input_width, stride):
dtype
=
np
.
float32
)
keypoint_offset
[
0
,
2
,
4
]
=
0.2
,
0.4
keypoint_depth
=
np
.
zeros
((
2
,
output_height
,
output_width
,
_NUM_KEYPOINTS
if
per_keypoint_depth
else
1
),
dtype
=
np
.
float32
)
keypoint_depth
[
0
,
2
,
4
]
=
3.0
keypoint_regression
=
np
.
zeros
(
(
2
,
output_height
,
output_width
,
2
*
_NUM_KEYPOINTS
),
dtype
=
np
.
float32
)
keypoint_regression
[
0
,
2
,
4
]
=
0.0
,
0.0
,
0.2
,
0.4
,
0.0
,
0.0
,
0.2
,
0.4
...
...
@@ -2073,14 +2297,10 @@ def get_fake_prediction_dict(input_height, input_width, stride):
tf
.
constant
(
object_center
),
tf
.
constant
(
object_center
)
],
cnma
.
BOX_SCALE
:
[
tf
.
constant
(
object_scale
),
tf
.
constant
(
object_scale
)
],
cnma
.
BOX_OFFSET
:
[
tf
.
constant
(
object_offset
),
tf
.
constant
(
object_offset
)
],
cnma
.
BOX_SCALE
:
[
tf
.
constant
(
object_scale
),
tf
.
constant
(
object_scale
)],
cnma
.
BOX_OFFSET
:
[
tf
.
constant
(
object_offset
),
tf
.
constant
(
object_offset
)],
cnma
.
get_keypoint_name
(
_TASK_NAME
,
cnma
.
KEYPOINT_HEATMAP
):
[
tf
.
constant
(
keypoint_heatmap
),
tf
.
constant
(
keypoint_heatmap
)
...
...
@@ -2093,6 +2313,10 @@ def get_fake_prediction_dict(input_height, input_width, stride):
tf
.
constant
(
keypoint_regression
),
tf
.
constant
(
keypoint_regression
)
],
cnma
.
get_keypoint_name
(
_TASK_NAME
,
cnma
.
KEYPOINT_DEPTH
):
[
tf
.
constant
(
keypoint_depth
),
tf
.
constant
(
keypoint_depth
)
],
cnma
.
SEGMENTATION_HEATMAP
:
[
tf
.
constant
(
mask_heatmap
),
tf
.
constant
(
mask_heatmap
)
...
...
@@ -2117,7 +2341,10 @@ def get_fake_prediction_dict(input_height, input_width, stride):
return
prediction_dict
def
get_fake_groundtruth_dict
(
input_height
,
input_width
,
stride
):
def
get_fake_groundtruth_dict
(
input_height
,
input_width
,
stride
,
has_depth
=
False
):
"""Prepares the fake groundtruth dictionary."""
# A small box with center at (0.55, 0.55).
boxes
=
[
...
...
@@ -2146,6 +2373,26 @@ def get_fake_groundtruth_dict(input_height, input_width, stride):
axis
=
2
),
multiples
=
[
1
,
1
,
2
]),
]
if
has_depth
:
keypoint_depths
=
[
tf
.
constant
([[
float
(
'nan'
),
3.0
,
float
(
'nan'
),
3.0
,
0.55
,
0.0
]]),
tf
.
constant
([[
float
(
'nan'
),
0.55
,
float
(
'nan'
),
0.55
,
0.55
,
0.0
]])
]
keypoint_depth_weights
=
[
tf
.
constant
([[
1.0
,
1.0
,
1.0
,
1.0
,
0.0
,
0.0
]]),
tf
.
constant
([[
1.0
,
1.0
,
1.0
,
1.0
,
0.0
,
0.0
]])
]
else
:
keypoint_depths
=
[
tf
.
constant
([[
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
]]),
tf
.
constant
([[
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
]])
]
keypoint_depth_weights
=
[
tf
.
constant
([[
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
]]),
tf
.
constant
([[
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
]])
]
labeled_classes
=
[
tf
.
one_hot
([
1
],
depth
=
_NUM_CLASSES
)
+
tf
.
one_hot
([
2
],
depth
=
_NUM_CLASSES
),
tf
.
one_hot
([
0
],
depth
=
_NUM_CLASSES
)
+
tf
.
one_hot
([
1
],
depth
=
_NUM_CLASSES
),
...
...
@@ -2187,11 +2434,12 @@ def get_fake_groundtruth_dict(input_height, input_width, stride):
fields
.
BoxListFields
.
weights
:
weights
,
fields
.
BoxListFields
.
classes
:
classes
,
fields
.
BoxListFields
.
keypoints
:
keypoints
,
fields
.
BoxListFields
.
keypoint_depths
:
keypoint_depths
,
fields
.
BoxListFields
.
keypoint_depth_weights
:
keypoint_depth_weights
,
fields
.
BoxListFields
.
masks
:
masks
,
fields
.
BoxListFields
.
densepose_num_points
:
densepose_num_points
,
fields
.
BoxListFields
.
densepose_part_ids
:
densepose_part_ids
,
fields
.
BoxListFields
.
densepose_surface_coords
:
densepose_surface_coords
,
fields
.
BoxListFields
.
densepose_surface_coords
:
densepose_surface_coords
,
fields
.
BoxListFields
.
track_ids
:
track_ids
,
fields
.
BoxListFields
.
temporal_offsets
:
temporal_offsets
,
fields
.
BoxListFields
.
track_match_flags
:
track_match_flags
,
...
...
@@ -2201,7 +2449,7 @@ def get_fake_groundtruth_dict(input_height, input_width, stride):
@
unittest
.
skipIf
(
tf_version
.
is_tf1
(),
'Skipping TF2.X only test.'
)
class
CenterNetMetaComputeLossTest
(
test_case
.
TestCase
):
class
CenterNetMetaComputeLossTest
(
test_case
.
TestCase
,
parameterized
.
TestCase
):
"""Test for CenterNet loss compuation related functions."""
def
setUp
(
self
):
...
...
@@ -2328,6 +2576,45 @@ class CenterNetMetaComputeLossTest(test_case.TestCase):
# The prediction and groundtruth are curated to produce very low loss.
self
.
assertGreater
(
0.01
,
loss
)
@
parameterized
.
parameters
(
{
'per_keypoint_depth'
:
False
},
{
'per_keypoint_depth'
:
True
},
)
def
test_compute_kp_depth_loss
(
self
,
per_keypoint_depth
):
prediction_dict
=
get_fake_prediction_dict
(
self
.
input_height
,
self
.
input_width
,
self
.
stride
,
per_keypoint_depth
=
per_keypoint_depth
)
model
=
build_center_net_meta_arch
(
num_classes
=
1
,
per_keypoint_offset
=
per_keypoint_depth
,
predict_depth
=
True
,
per_keypoint_depth
=
per_keypoint_depth
,
peak_radius
=
1
if
per_keypoint_depth
else
0
)
model
.
_groundtruth_lists
=
get_fake_groundtruth_dict
(
self
.
input_height
,
self
.
input_width
,
self
.
stride
,
has_depth
=
True
)
def
graph_fn
():
loss
=
model
.
_compute_kp_depth_loss
(
input_height
=
self
.
input_height
,
input_width
=
self
.
input_width
,
task_name
=
_TASK_NAME
,
depth_predictions
=
prediction_dict
[
cnma
.
get_keypoint_name
(
_TASK_NAME
,
cnma
.
KEYPOINT_DEPTH
)],
localization_loss_fn
=
self
.
localization_loss_fn
)
return
loss
loss
=
self
.
execute
(
graph_fn
,
[])
if
per_keypoint_depth
:
# The loss is computed on a disk with radius 1 but only the center pixel
# has the accurate prediction. The final loss is (4 * |3-0|) / 5 = 2.4
self
.
assertAlmostEqual
(
2.4
,
loss
,
delta
=
1e-4
)
else
:
# The prediction and groundtruth are curated to produce very low loss.
self
.
assertGreater
(
0.01
,
loss
)
def
test_compute_track_embedding_loss
(
self
):
default_fc
=
self
.
model
.
track_reid_classification_net
# Initialize the kernel to extreme values so that the classification score
...
...
research/object_detection/protos/center_net.proto
View file @
0c85c06c
...
...
@@ -165,6 +165,21 @@ message CenterNet {
// out_height, out_width, 2 * num_keypoints] (recommended when the
// offset_peak_radius is not zero).
optional
bool
per_keypoint_offset
=
18
[
default
=
false
];
// Indicates whether to predict the depth of each keypoints. Note that this
// is only supported in the single class keypoint task.
optional
bool
predict_depth
=
19
[
default
=
false
];
// Indicates whether to predict depths for each keypoint channel
// separately. If set False, the output depth target has the shape
// [batch_size, out_height, out_width, 1]. If set True, the output depth
// target has the shape [batch_size, out_height, out_width,
// num_keypoints]. Recommend to set this value and "per_keypoint_offset" to
// both be True at the same time.
optional
bool
per_keypoint_depth
=
20
[
default
=
false
];
// The weight of the keypoint depth loss.
optional
float
keypoint_depth_loss_weight
=
21
[
default
=
1.0
];
}
repeated
KeypointEstimation
keypoint_estimation_task
=
7
;
...
...
@@ -278,7 +293,6 @@ message CenterNet {
// from CenterNet. Use this optional parameter to apply traditional non max
// suppression and score thresholding.
optional
PostProcessing
post_processing
=
24
;
}
message
CenterNetFeatureExtractor
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment