"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "664d7a24625dcd3ebeda58d87faa4da030130f89"
Commit e39f2477 authored by Yu-hui Chen's avatar Yu-hui Chen Committed by TF Object Detection Team
Browse files

Updated post-processing logics for MoveNet, including:

1) Added the score-to-scaled-distance ranking method.
2) Added the gaussian-weighted ranking method.
3) Added the hyperparameter used in the rescoring function.

PiperOrigin-RevId: 379646603
parent 469339ec
...@@ -913,7 +913,10 @@ def keypoint_proto_to_params(kp_config, keypoint_map_dict): ...@@ -913,7 +913,10 @@ def keypoint_proto_to_params(kp_config, keypoint_map_dict):
offset_head_num_filters=offset_head_num_filters, offset_head_num_filters=offset_head_num_filters,
offset_head_kernel_sizes=offset_head_kernel_sizes, offset_head_kernel_sizes=offset_head_kernel_sizes,
regress_head_num_filters=regress_head_num_filters, regress_head_num_filters=regress_head_num_filters,
regress_head_kernel_sizes=regress_head_kernel_sizes) regress_head_kernel_sizes=regress_head_kernel_sizes,
score_distance_multiplier=kp_config.score_distance_multiplier,
std_dev_multiplier=kp_config.std_dev_multiplier,
rescoring_threshold=kp_config.rescoring_threshold)
def object_detection_proto_to_params(od_config): def object_detection_proto_to_params(od_config):
......
...@@ -123,6 +123,9 @@ class ModelBuilderTF2Test( ...@@ -123,6 +123,9 @@ class ModelBuilderTF2Test(
predict_depth: true predict_depth: true
per_keypoint_depth: true per_keypoint_depth: true
keypoint_depth_loss_weight: 0.3 keypoint_depth_loss_weight: 0.3
score_distance_multiplier: 11.0
std_dev_multiplier: 2.8
rescoring_threshold: 0.5
""" """
if customize_head_params: if customize_head_params:
task_proto_txt += """ task_proto_txt += """
...@@ -358,6 +361,9 @@ class ModelBuilderTF2Test( ...@@ -358,6 +361,9 @@ class ModelBuilderTF2Test(
self.assertEqual(kp_params.predict_depth, True) self.assertEqual(kp_params.predict_depth, True)
self.assertEqual(kp_params.per_keypoint_depth, True) self.assertEqual(kp_params.per_keypoint_depth, True)
self.assertAlmostEqual(kp_params.keypoint_depth_loss_weight, 0.3) self.assertAlmostEqual(kp_params.keypoint_depth_loss_weight, 0.3)
self.assertAlmostEqual(kp_params.score_distance_multiplier, 11.0)
self.assertAlmostEqual(kp_params.std_dev_multiplier, 2.8)
self.assertAlmostEqual(kp_params.rescoring_threshold, 0.5)
if customize_head_params: if customize_head_params:
# Set by the config. # Set by the config.
self.assertEqual(kp_params.heatmap_head_num_filters, [64, 32]) self.assertEqual(kp_params.heatmap_head_num_filters, [64, 32])
......
...@@ -825,6 +825,111 @@ def regressed_keypoints_at_object_centers(regressed_keypoint_predictions, ...@@ -825,6 +825,111 @@ def regressed_keypoints_at_object_centers(regressed_keypoint_predictions,
[batch_size, num_instances, -1]) [batch_size, num_instances, -1])
def sdr_scaled_ranking_score(
keypoint_scores, distances, bboxes, score_distance_multiplier):
"""Score-to-distance-ratio method to rank keypoint candidates.
This corresponds to the ranking method: 'score_scaled_distance_ratio'. The
keypoint candidates are ranked using the formula:
ranking_score = score / (distance + offset)
where 'score' is the keypoint heatmap scores, 'distance' is the distance
between the heatmap peak location and the regressed joint location,
'offset' is a function of the predicted bounding box:
offset = max(bbox height, bbox width) * score_distance_multiplier
The ranking score is used to find the best keypoint candidate for snapping
regressed joints.
Args:
keypoint_scores: A float tensor of shape
[batch_size, max_candidates, num_keypoints] indicating the scores for
keypoint candidates.
distances: A float tensor of shape
[batch_size, num_instances, max_candidates, num_keypoints] indicating the
distances between the keypoint candidates and the joint regression
locations of each instances.
bboxes: A tensor of shape [batch_size, num_instances, 4] with predicted
bounding boxes for each instance, expressed in the output coordinate
frame. If not provided, boxes will be computed from regressed keypoints.
score_distance_multiplier: A scalar used to multiply the bounding box size
to be the offset in the score-to-distance-ratio formula.
Returns:
A float tensor of shape [batch_size, num_instances, max_candidates,
num_keypoints] representing the ranking scores of each keypoint candidates.
"""
# Get ymin, xmin, ymax, xmax bounding box coordinates.
# Shape: [batch_size, num_instances]
ymin, xmin, ymax, xmax = tf.unstack(bboxes, axis=2)
# Shape: [batch_size, num_instances].
offsets = tf.math.maximum(
ymax - ymin, xmax - xmin) * score_distance_multiplier
# Shape: [batch_size, num_instances, max_candidates, num_keypoints]
ranking_scores = keypoint_scores[:, tf.newaxis, :, :] / (
distances + offsets[:, :, tf.newaxis, tf.newaxis])
return ranking_scores
def gaussian_weighted_score(
keypoint_scores, distances, keypoint_std_dev, bboxes):
"""Gaussian weighted method to rank keypoint candidates.
This corresponds to the ranking method: 'gaussian_weighted'. The
keypoint candidates are ranked using the formula:
score * exp((-distances^2) / (2 * sigma^2))
where 'score' is the keypoint heatmap score, 'distances' is the distance
between the heatmap peak location and the regressed joint location and 'sigma'
is a Gaussian standard deviation used in generating the Gausian heatmap target
multiplied by the 'std_dev_multiplier'.
The ranking score is used to find the best keypoint candidate for snapping
regressed joints.
Args:
keypoint_scores: A float tensor of shape
[batch_size, max_candidates, num_keypoints] indicating the scores for
keypoint candidates.
distances: A float tensor of shape
[batch_size, num_instances, max_candidates, num_keypoints] indicating the
distances between the keypoint candidates and the joint regression
locations of each instances.
keypoint_std_dev: A list of float represent the standard deviation of the
Gaussian kernel used to generate the keypoint heatmap. It is to provide
the flexibility of using different sizes of Gaussian kernel for each
keypoint class.
bboxes: A tensor of shape [batch_size, num_instances, 4] with predicted
bounding boxes for each instance, expressed in the output coordinate
frame. If not provided, boxes will be computed from regressed keypoints.
Returns:
A float tensor of shape [batch_size, num_instances, max_candidates,
num_keypoints] representing the ranking scores of each keypoint candidates.
"""
# Get ymin, xmin, ymax, xmax bounding box coordinates.
# Shape: [batch_size, num_instances]
ymin, xmin, ymax, xmax = tf.unstack(bboxes, axis=2)
# shape: [num_keypoints]
keypoint_std_dev = tf.constant(keypoint_std_dev)
# shape: [batch_size, num_instances]
sigma = cn_assigner._compute_std_dev_from_box_size( # pylint: disable=protected-access
ymax - ymin, xmax - xmin, min_overlap=0.7)
# shape: [batch_size, num_instances, num_keypoints]
sigma = keypoint_std_dev[tf.newaxis, tf.newaxis, :] * sigma[:, :, tf.newaxis]
(_, _, max_candidates, _) = _get_shape(distances, 4)
# shape: [batch_size, num_instances, max_candidates, num_keypoints]
sigma = tf.tile(
sigma[:, :, tf.newaxis, :], multiples=[1, 1, max_candidates, 1])
gaussian_map = tf.exp((-1 * distances * distances) / (2 * sigma * sigma))
return keypoint_scores[:, tf.newaxis, :, :] * gaussian_map
def refine_keypoints(regressed_keypoints, def refine_keypoints(regressed_keypoints,
keypoint_candidates, keypoint_candidates,
keypoint_scores, keypoint_scores,
...@@ -836,7 +941,9 @@ def refine_keypoints(regressed_keypoints, ...@@ -836,7 +941,9 @@ def refine_keypoints(regressed_keypoints,
candidate_ranking_mode='min_distance', candidate_ranking_mode='min_distance',
score_distance_offset=1e-6, score_distance_offset=1e-6,
keypoint_depth_candidates=None, keypoint_depth_candidates=None,
keypoint_score_threshold=0.1): keypoint_score_threshold=0.1,
score_distance_multiplier=0.1,
keypoint_std_dev=None):
"""Refines regressed keypoints by snapping to the nearest candidate keypoints. """Refines regressed keypoints by snapping to the nearest candidate keypoints.
The initial regressed keypoints represent a full set of keypoints regressed The initial regressed keypoints represent a full set of keypoints regressed
...@@ -890,7 +997,8 @@ def refine_keypoints(regressed_keypoints, ...@@ -890,7 +997,8 @@ def refine_keypoints(regressed_keypoints,
largest dimension of a bounding box. The resulting distance becomes a largest dimension of a bounding box. The resulting distance becomes a
search radius for candidates in the vicinity of each regressed keypoint. search radius for candidates in the vicinity of each regressed keypoint.
candidate_ranking_mode: A string as one of ['min_distance', candidate_ranking_mode: A string as one of ['min_distance',
'score_distance_ratio'] indicating how to select the candidate. If invalid 'score_distance_ratio', 'score_scaled_distance_ratio',
'gaussian_weighted'] indicating how to select the candidate. If invalid
value is provided, an ValueError will be raised. value is provided, an ValueError will be raised.
score_distance_offset: The distance offset to apply in the denominator when score_distance_offset: The distance offset to apply in the denominator when
candidate_ranking_mode is 'score_distance_ratio'. The metric to maximize candidate_ranking_mode is 'score_distance_ratio'. The metric to maximize
...@@ -902,6 +1010,13 @@ def refine_keypoints(regressed_keypoints, ...@@ -902,6 +1010,13 @@ def refine_keypoints(regressed_keypoints,
keypoint candidates. keypoint candidates.
keypoint_score_threshold: float, The heatmap score threshold for keypoint_score_threshold: float, The heatmap score threshold for
a keypoint to become a valid candidate. a keypoint to become a valid candidate.
score_distance_multiplier: A scalar used to multiply the bounding box size
to be the offset in the score-to-distance-ratio formula.
keypoint_std_dev: A list of float represent the standard deviation of the
Gaussian kernel used to rank the keypoint candidates. It offers the
flexibility of using different sizes of Gaussian kernel for each keypoint
class. Only applicable when the candidate_ranking_mode equals to
'gaussian_weighted'.
Returns: Returns:
A tuple with: A tuple with:
...@@ -974,6 +1089,15 @@ def refine_keypoints(regressed_keypoints, ...@@ -974,6 +1089,15 @@ def refine_keypoints(regressed_keypoints,
multiples=[1, num_instances, 1, 1]) multiples=[1, num_instances, 1, 1])
ranking_scores = tiled_keypoint_scores / (distances + score_distance_offset) ranking_scores = tiled_keypoint_scores / (distances + score_distance_offset)
nearby_candidate_inds = tf.math.argmax(ranking_scores, axis=2) nearby_candidate_inds = tf.math.argmax(ranking_scores, axis=2)
elif candidate_ranking_mode == 'score_scaled_distance_ratio':
ranking_scores = sdr_scaled_ranking_score(
keypoint_scores, distances, bboxes, score_distance_multiplier)
nearby_candidate_inds = tf.math.argmax(ranking_scores, axis=2)
elif candidate_ranking_mode == 'gaussian_weighted':
ranking_scores = gaussian_weighted_score(
keypoint_scores, distances, keypoint_std_dev, bboxes)
nearby_candidate_inds = tf.math.argmax(ranking_scores, axis=2)
weighted_scores = tf.math.reduce_max(ranking_scores, axis=2)
else: else:
raise ValueError('Not recognized candidate_ranking_mode: %s' % raise ValueError('Not recognized candidate_ranking_mode: %s' %
candidate_ranking_mode) candidate_ranking_mode)
...@@ -987,6 +1111,11 @@ def refine_keypoints(regressed_keypoints, ...@@ -987,6 +1111,11 @@ def refine_keypoints(regressed_keypoints,
nearby_candidate_inds, nearby_candidate_inds,
keypoint_depth_candidates)) keypoint_depth_candidates))
# If the ranking mode is 'gaussian_weighted', we use the ranking scores as the
# final keypoint confidence since their values are in between [0, 1].
if candidate_ranking_mode == 'gaussian_weighted':
nearby_candidate_scores = weighted_scores
if bboxes is None: if bboxes is None:
# Filter out the chosen candidate with score lower than unmatched # Filter out the chosen candidate with score lower than unmatched
# keypoint score. # keypoint score.
...@@ -1737,7 +1866,8 @@ class KeypointEstimationParams( ...@@ -1737,7 +1866,8 @@ class KeypointEstimationParams(
'rescore_instances', 'heatmap_head_num_filters', 'rescore_instances', 'heatmap_head_num_filters',
'heatmap_head_kernel_sizes', 'offset_head_num_filters', 'heatmap_head_kernel_sizes', 'offset_head_num_filters',
'offset_head_kernel_sizes', 'regress_head_num_filters', 'offset_head_kernel_sizes', 'regress_head_num_filters',
'regress_head_kernel_sizes' 'regress_head_kernel_sizes', 'score_distance_multiplier',
'std_dev_multiplier', 'rescoring_threshold'
])): ])):
"""Namedtuple to host object detection related parameters. """Namedtuple to host object detection related parameters.
...@@ -1782,7 +1912,10 @@ class KeypointEstimationParams( ...@@ -1782,7 +1912,10 @@ class KeypointEstimationParams(
offset_head_num_filters=(256), offset_head_num_filters=(256),
offset_head_kernel_sizes=(3), offset_head_kernel_sizes=(3),
regress_head_num_filters=(256), regress_head_num_filters=(256),
regress_head_kernel_sizes=(3)): regress_head_kernel_sizes=(3),
score_distance_multiplier=0.1,
std_dev_multiplier=1.0,
rescoring_threshold=0.0):
"""Constructor with default values for KeypointEstimationParams. """Constructor with default values for KeypointEstimationParams.
Args: Args:
...@@ -1834,8 +1967,9 @@ class KeypointEstimationParams( ...@@ -1834,8 +1967,9 @@ class KeypointEstimationParams(
candidate_search_scale: The scale parameter that multiplies the largest candidate_search_scale: The scale parameter that multiplies the largest
dimension of a bounding box. The resulting distance becomes a search dimension of a bounding box. The resulting distance becomes a search
radius for candidates in the vicinity of each regressed keypoint. radius for candidates in the vicinity of each regressed keypoint.
candidate_ranking_mode: One of ['min_distance', 'score_distance_ratio'] candidate_ranking_mode: One of ['min_distance', 'score_distance_ratio',
indicating how to select the keypoint candidate. 'score_scaled_distance_ratio', 'gaussian_weighted'] indicating how to
select the keypoint candidate.
offset_peak_radius: The radius (in the unit of output pixel) around offset_peak_radius: The radius (in the unit of output pixel) around
groundtruth heatmap peak to assign the offset targets. If set 0, then groundtruth heatmap peak to assign the offset targets. If set 0, then
the offset target will only be assigned to the heatmap peak (same the offset target will only be assigned to the heatmap peak (same
...@@ -1874,6 +2008,14 @@ class KeypointEstimationParams( ...@@ -1874,6 +2008,14 @@ class KeypointEstimationParams(
by the keypoint regression prediction head. by the keypoint regression prediction head.
regress_head_kernel_sizes: kernel size of the convolutional layers used regress_head_kernel_sizes: kernel size of the convolutional layers used
by the keypoint regression prediction head. by the keypoint regression prediction head.
score_distance_multiplier: A scalar used to multiply the bounding box size
to be used as the offset in the score-to-distance-ratio formula.
std_dev_multiplier: A scalar used to multiply the standard deviation to
control the Gaussian kernel which used to weight the candidates.
rescoring_threshold: A scalar used when "rescore_instances" is set to
True. The detection score of an instance is set to be the average over
the scores of the keypoints which their scores higher than the
threshold.
Returns: Returns:
An initialized KeypointEstimationParams namedtuple. An initialized KeypointEstimationParams namedtuple.
...@@ -1891,7 +2033,8 @@ class KeypointEstimationParams( ...@@ -1891,7 +2033,8 @@ class KeypointEstimationParams(
clip_out_of_frame_keypoints, rescore_instances, clip_out_of_frame_keypoints, rescore_instances,
heatmap_head_num_filters, heatmap_head_kernel_sizes, heatmap_head_num_filters, heatmap_head_kernel_sizes,
offset_head_num_filters, offset_head_kernel_sizes, offset_head_num_filters, offset_head_kernel_sizes,
regress_head_num_filters, regress_head_kernel_sizes) regress_head_num_filters, regress_head_kernel_sizes,
score_distance_multiplier, std_dev_multiplier, rescoring_threshold)
class ObjectCenterParams( class ObjectCenterParams(
...@@ -3295,7 +3438,10 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -3295,7 +3438,10 @@ class CenterNetMetaArch(model.DetectionModel):
kpt_mask_tiled == 1.0) kpt_mask_tiled == 1.0)
class_and_keypoint_mask_float = tf.cast(class_and_keypoint_mask, class_and_keypoint_mask_float = tf.cast(class_and_keypoint_mask,
dtype=tf.float32) dtype=tf.float32)
visible_keypoints = tf.math.greater(keypoint_scores, 0.0) visible_keypoints = tf.math.greater(
keypoint_scores, kp_params.rescoring_threshold)
keypoint_scores = tf.where(
visible_keypoints, keypoint_scores, tf.zeros_like(keypoint_scores))
num_visible_keypoints = tf.reduce_sum( num_visible_keypoints = tf.reduce_sum(
class_and_keypoint_mask_float * class_and_keypoint_mask_float *
tf.cast(visible_keypoints, tf.float32), axis=-1) tf.cast(visible_keypoints, tf.float32), axis=-1)
...@@ -3903,9 +4049,16 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -3903,9 +4049,16 @@ class CenterNetMetaArch(model.DetectionModel):
# instances and keypoints for class i, respectively. # instances and keypoints for class i, respectively.
(kpt_coords_for_class, kpt_scores_for_class, _) = ( (kpt_coords_for_class, kpt_scores_for_class, _) = (
self._postprocess_keypoints_for_class_and_image( self._postprocess_keypoints_for_class_and_image(
keypoint_heatmap, keypoint_offsets, keypoint_regression, keypoint_heatmap,
classes, y_indices_for_kpt_class, x_indices_for_kpt_class, keypoint_offsets,
boxes_for_kpt_class, ex_ind, kp_params)) keypoint_regression,
classes,
y_indices_for_kpt_class,
x_indices_for_kpt_class,
boxes_for_kpt_class,
ex_ind,
kp_params,
))
# Expand keypoint dimension (with padding) so that coordinates and # Expand keypoint dimension (with padding) so that coordinates and
# scores have shape [1, num_instances_i, num_total_keypoints, 2] and # scores have shape [1, num_instances_i, num_total_keypoints, 2] and
...@@ -4158,6 +4311,9 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -4158,6 +4311,9 @@ class CenterNetMetaArch(model.DetectionModel):
max_candidates=kp_params.num_candidates_per_keypoint, max_candidates=kp_params.num_candidates_per_keypoint,
keypoint_depths=keypoint_depths)) keypoint_depths=keypoint_depths))
kpts_std_dev_postprocess = [
s * kp_params.std_dev_multiplier for s in kp_params.keypoint_std_dev
]
# Get the refined keypoints and scores, of shape # Get the refined keypoints and scores, of shape
# [1, num_instances, num_keypoints, 2] and # [1, num_instances, num_keypoints, 2] and
# [1, num_instances, num_keypoints], respectively. # [1, num_instances, num_keypoints], respectively.
...@@ -4173,8 +4329,9 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -4173,8 +4329,9 @@ class CenterNetMetaArch(model.DetectionModel):
candidate_ranking_mode=kp_params.candidate_ranking_mode, candidate_ranking_mode=kp_params.candidate_ranking_mode,
score_distance_offset=kp_params.score_distance_offset, score_distance_offset=kp_params.score_distance_offset,
keypoint_depth_candidates=keypoint_depth_candidates, keypoint_depth_candidates=keypoint_depth_candidates,
keypoint_score_threshold=( keypoint_score_threshold=(kp_params.keypoint_candidate_score_threshold),
kp_params.keypoint_candidate_score_threshold)) score_distance_multiplier=kp_params.score_distance_multiplier,
keypoint_std_dev=kpts_std_dev_postprocess)
return refined_keypoints, refined_scores, refined_depths return refined_keypoints, refined_scores, refined_depths
......
...@@ -1380,6 +1380,136 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): ...@@ -1380,6 +1380,136 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
np.testing.assert_allclose(expected_refined_keypoints, refined_keypoints) np.testing.assert_allclose(expected_refined_keypoints, refined_keypoints)
np.testing.assert_allclose(expected_refined_scores, refined_scores) np.testing.assert_allclose(expected_refined_scores, refined_scores)
def test_sdr_scaled_ranking_score(self):
keypoint_scores_np = np.array(
[
# Example 0.
[
[0.9, 0.9, 0.9], # Candidate 0.
[0.9, 0.9, 0.9], # Candidate 1.
]
],
dtype=np.float32)
distances_np = np.expand_dims(
np.array(
[
# Instance 0.
[
[2.0, 1.0, 0.0], # Candidate 0.
[2.0, 1.0, 2.0], # Candidate 1.
],
# Instance 1.
[
[2.0, 1.0, 0.0], # Candidate 0.
[2.0, 1.0, 2.0], # Candidate 1.
]
],
dtype=np.float32),
axis=0)
bboxes_np = np.array(
[
# Example 0.
[
[2.0, 2.0, 20.0, 20.0], # Instance 0 large box.
[3.0, 3.0, 4.0, 4.0], # Instance 1 small box.
],
],
dtype=np.float32)
# def graph_fn():
keypoint_scores = tf.constant(
keypoint_scores_np, dtype=tf.float32)
distances = tf.constant(
distances_np, dtype=tf.float32)
bboxes = tf.constant(bboxes_np, dtype=tf.float32)
ranking_scores = cnma.sdr_scaled_ranking_score(
keypoint_scores=keypoint_scores,
distances=distances,
bboxes=bboxes,
score_distance_multiplier=0.1)
self.assertAllEqual([1, 2, 2, 3], ranking_scores.shape)
# When the scores are the same, larger distance results in lower ranking
# score.
# instance 0, candidate 0, keypoint type 0 v.s 1 vs. 2
self.assertGreater(ranking_scores[0, 0, 0, 2], ranking_scores[0, 0, 0, 1])
self.assertGreater(ranking_scores[0, 0, 0, 1], ranking_scores[0, 0, 0, 0])
# When the scores are the same, the difference of distances are the same,
# instance with larger bbox has less ranking score difference, i.e. less
# sensitive to the distance change.
# instance 0 vs. 1, candidate 0, keypoint type 0 and 1
self.assertGreater(
ranking_scores[0, 1, 1, 1] - ranking_scores[0, 1, 1, 0],
ranking_scores[0, 0, 1, 1] - ranking_scores[0, 0, 1, 0]
)
def test_gaussian_weighted_score(self):
keypoint_scores_np = np.array(
[
# Example 0.
[
[0.9, 0.9, 0.9], # Candidate 0.
[1.0, 0.8, 1.0], # Candidate 1.
]
],
dtype=np.float32)
distances_np = np.expand_dims(
np.array(
[
# Instance 0.
[
[2.0, 1.0, 0.0], # Candidate 0.
[1.0, 0.0, 2.0], # Candidate 1.
],
# Instance 1.
[
[2.0, 1.0, 0.0], # Candidate 0.
[1.0, 0.0, 2.0], # Candidate 1.
]
],
dtype=np.float32),
axis=0)
bboxes_np = np.array(
[
# Example 0.
[
[2.0, 2.0, 20.0, 20.0], # Instance 0 large box.
[3.0, 3.0, 4.0, 4.0], # Instance 1 small box.
],
],
dtype=np.float32)
# def graph_fn():
keypoint_scores = tf.constant(
keypoint_scores_np, dtype=tf.float32)
distances = tf.constant(
distances_np, dtype=tf.float32)
bboxes = tf.constant(bboxes_np, dtype=tf.float32)
ranking_scores = cnma.gaussian_weighted_score(
keypoint_scores=keypoint_scores,
distances=distances,
keypoint_std_dev=[1.0, 0.5, 1.5],
bboxes=bboxes)
self.assertAllEqual([1, 2, 2, 3], ranking_scores.shape)
# When distance is zero, the candidate's score remains the same.
# instance 0, candidate 0, keypoint type 2
self.assertAlmostEqual(ranking_scores[0, 0, 0, 2], keypoint_scores[0, 0, 2])
# instance 0, candidate 1, keypoint type 1
self.assertAlmostEqual(ranking_scores[0, 0, 1, 1], keypoint_scores[0, 1, 1])
# When the distances of two candidates are 1:2 and the keypoint standard
# deviation is 1:2 and the keypoint heatmap scores are the same, the
# resulting ranking score should be the same.
# instance 0, candidate 0, keypoint type 0, 1.
self.assertAlmostEqual(
ranking_scores[0, 0, 0, 0], ranking_scores[0, 0, 0, 1])
# When the distances/heatmap scores/keypoint standard deviations are the
# same, the instance with larger bbox size gets higher score.
self.assertGreater(ranking_scores[0, 0, 0, 0], ranking_scores[0, 1, 0, 0])
def test_pad_to_full_keypoint_dim(self): def test_pad_to_full_keypoint_dim(self):
batch_size = 4 batch_size = 4
num_instances = 8 num_instances = 8
......
...@@ -187,8 +187,9 @@ message CenterNet { ...@@ -187,8 +187,9 @@ message CenterNet {
// vicinity of each regressed keypoint. // vicinity of each regressed keypoint.
optional float candidate_search_scale = 15 [default = 0.3]; optional float candidate_search_scale = 15 [default = 0.3];
// One of ['min_distance', 'score_distance_ratio'] indicating how to select // One of ['min_distance', 'score_distance_ratio',
// the keypoint candidate. // 'score_scaled_distance_ratio', 'gaussian_weighted'] indicating how to
// select the keypoint candidate.
optional string candidate_ranking_mode = 16 [default = "min_distance"]; optional string candidate_ranking_mode = 16 [default = "min_distance"];
// The score distance ratio offset, only used if candidate_ranking_mode is // The score distance ratio offset, only used if candidate_ranking_mode is
...@@ -197,6 +198,28 @@ message CenterNet { ...@@ -197,6 +198,28 @@ message CenterNet {
// keypoint_score / (distance + score_distance_offset) // keypoint_score / (distance + score_distance_offset)
optional float score_distance_offset = 22 [default = 1.0]; optional float score_distance_offset = 22 [default = 1.0];
// A scalar used to multiply the bounding box size to be used as the offset
// in the score-to-distance-ratio formula. Only applicable when the
// candidate_ranking_mode is score_scaled_distance_ratio.
// The keypoint candidates are ranked using the formula:
// ranking_score = score / (distance + offset)
// where 'score' is the keypoint heatmap scores, 'distance' is the distance
// between the heatmap peak location and the regressed joint location,
// 'offset' is a function of the predicted bounding box:
// offset = max(bbox height, bbox width) * score_distance_multiplier
optional float score_distance_multiplier = 28 [default = 0.1];
// A scalar used to multiply the Gaussian standard deviation to control the
// Gaussian kernel which is used to weight the candidates. Only applicable
// when the candidate_ranking_mode is gaussian_weighted.
// The keypoint candidates are ranked using the formula:
// scores * exp((-distances^2) / (2 * sigma^2))
// where 'distances' is the distance between the heatmap peak location and
// the regressed joint location and 'sigma' is the Gaussian standard
// deviation used in generating the Gaussian heatmap target multiplied by the
// 'std_dev_multiplier'.
optional float std_dev_multiplier = 29 [default = 1.0];
// The radius (in the unit of output pixel) around heatmap peak to assign // The radius (in the unit of output pixel) around heatmap peak to assign
// the offset targets. If set 0, then the offset target will only be // the offset targets. If set 0, then the offset target will only be
// assigned to the heatmap peak (same behavior as the original paper). // assigned to the heatmap peak (same behavior as the original paper).
...@@ -238,6 +261,11 @@ message CenterNet { ...@@ -238,6 +261,11 @@ message CenterNet {
// the number of keypoints for that class. // the number of keypoints for that class.
optional bool rescore_instances = 24 [default = false]; optional bool rescore_instances = 24 [default = false];
// A scalar used when "rescore_instances" is set to True. The detection
// score of an instance is set to be the average score among those keypoints
// with scores higher than the threshold.
optional float rescoring_threshold = 30 [default = 0.0];
// Parameters to determine the architecture of the keypoint heatmap // Parameters to determine the architecture of the keypoint heatmap
// prediction head. // prediction head.
optional PredictionHeadParams heatmap_head_params = 25; optional PredictionHeadParams heatmap_head_params = 25;
...@@ -466,3 +494,4 @@ message CenterNetFeatureExtractor { ...@@ -466,3 +494,4 @@ message CenterNetFeatureExtractor {
optional string upsampling_interpolation = 11 [default = 'nearest']; optional string upsampling_interpolation = 11 [default = 'nearest'];
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment