Removed the dependency of the keypoint task and object detection task in the

postprocessing logic. PiperOrigin-RevId: 362814113

Removed the dependency of the keypoint task and object detection task in the
postprocessing logic. PiperOrigin-RevId: 362814113
0a17932d · Yu-hui Chen · TF Object Detection Team · 2e77bb3e · 0a17932d · 0a17932d
Commit 0a17932d authored Mar 14, 2021 by Yu-hui Chen Committed by TF Object Detection Team Mar 14, 2021
4 changed files
--- a/research/object_detection/core/keypoint_ops.py
+++ b/research/object_detection/core/keypoint_ops.py
@@ -125,22 +125,24 @@ def change_coordinate_frame(keypoints, window, scope=None):
    return new_keypoints


-def keypoints_to_enclosing_bounding_boxes(keypoints):
+def keypoints_to_enclosing_bounding_boxes(keypoints, keypoints_axis=1):
  """Creates enclosing bounding boxes from keypoints.

  Args:
    keypoints: a [num_instances, num_keypoints, 2] float32 tensor with keypoints
      in [y, x] format.
+    keypoints_axis: An integer indicating the axis that correspond to the
+      keypoint dimension.

  Returns:
    A [num_instances, 4] float32 tensor that tightly covers all the keypoints
    for each instance.
  """
-  ymin = tf.math.reduce_min(keypoints[:, :, 0], axis=1)
-  xmin = tf.math.reduce_min(keypoints[:, :, 1], axis=1)
-  ymax = tf.math.reduce_max(keypoints[:, :, 0], axis=1)
-  xmax = tf.math.reduce_max(keypoints[:, :, 1], axis=1)
-  return tf.stack([ymin, xmin, ymax, xmax], axis=1)
+  ymin = tf.math.reduce_min(keypoints[..., 0], axis=keypoints_axis)
+  xmin = tf.math.reduce_min(keypoints[..., 1], axis=keypoints_axis)
+  ymax = tf.math.reduce_max(keypoints[..., 0], axis=keypoints_axis)
+  xmax = tf.math.reduce_max(keypoints[..., 1], axis=keypoints_axis)
+  return tf.stack([ymin, xmin, ymax, xmax], axis=keypoints_axis)


 def to_normalized_coordinates(keypoints, height, width,

--- a/research/object_detection/core/keypoint_ops_test.py
+++ b/research/object_detection/core/keypoint_ops_test.py
@@ -116,6 +116,35 @@ class KeypointOpsTest(test_case.TestCase):
        ])
    self.assertAllClose(expected_bboxes, output)

+  def test_keypoints_to_enclosing_bounding_boxes_axis2(self):
+    def graph_fn():
+      keypoints = tf.constant(
+          [
+              [  # Instance 0.
+                  [5., 10.],
+                  [3., 20.],
+                  [8., 4.],
+              ],
+              [  # Instance 1.
+                  [2., 12.],
+                  [0., 3.],
+                  [5., 19.],
+              ],
+          ], dtype=tf.float32)
+      keypoints = tf.stack([keypoints, keypoints], axis=0)
+      bboxes = keypoint_ops.keypoints_to_enclosing_bounding_boxes(
+          keypoints, keypoints_axis=2)
+      return bboxes
+    output = self.execute(graph_fn, [])
+
+    expected_bboxes = np.array(
+        [
+            [3., 4., 8., 20.],
+            [0., 3., 5., 19.]
+        ])
+    self.assertAllClose(expected_bboxes, output[0])
+    self.assertAllClose(expected_bboxes, output[1])
+
  def test_to_normalized_coordinates(self):
    def graph_fn():
      keypoints = tf.constant([

--- a/research/object_detection/meta_architectures/center_net_meta_arch.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch.py
@@ -329,20 +329,15 @@ def top_k_feature_map_locations(feature_map, max_pool_kernel_size=3, k=100,
  return scores, y_indices, x_indices, channel_indices


-def prediction_tensors_to_boxes(detection_scores, y_indices, x_indices,
-                                channel_indices, height_width_predictions,
+def prediction_tensors_to_boxes(y_indices, x_indices, height_width_predictions,
                                offset_predictions):
  """Converts CenterNet class-center, offset and size predictions to boxes.

  Args:
-    detection_scores: A [batch, num_boxes] float32 tensor with detection
-      scores in range [0, 1].
    y_indices: A [batch, num_boxes] int32 tensor with y indices corresponding to
      object center locations (expressed in output coordinate frame).
    x_indices: A [batch, num_boxes] int32 tensor with x indices corresponding to
      object center locations (expressed in output coordinate frame).
-    channel_indices: A [batch, num_boxes] int32 tensor with channel indices
-      corresponding to object classes.
    height_width_predictions: A float tensor of shape [batch_size, height,
      width, 2] representing the height and width of a box centered at each
      pixel.
@@ -353,13 +348,6 @@ def prediction_tensors_to_boxes(detection_scores, y_indices, x_indices,
  Returns:
    detection_boxes: A tensor of shape [batch_size, num_boxes, 4] holding the
      the raw bounding box coordinates of boxes.
-    detection_classes: An integer tensor of shape [batch_size, num_boxes]
-      indicating the predicted class for each box.
-    detection_scores: A float tensor of shape [batch_size, num_boxes] indicating
-      the score for each box.
-    num_detections: An integer tensor of shape [batch_size,] indicating the
-      number of boxes detected for each sample in the batch.
-
  """
  batch_size, num_boxes = _get_shape(y_indices, 2)

@@ -383,16 +371,12 @@ def prediction_tensors_to_boxes(detection_scores, y_indices, x_indices,
  heights, widths = tf.unstack(height_width, axis=2)
  y_offsets, x_offsets = tf.unstack(offsets, axis=2)

-  detection_classes = channel_indices
-
-  num_detections = tf.reduce_sum(tf.to_int32(detection_scores > 0), axis=1)
-
  boxes = tf.stack([y_indices + y_offsets - heights / 2.0,
                    x_indices + x_offsets - widths / 2.0,
                    y_indices + y_offsets + heights / 2.0,
                    x_indices + x_offsets + widths / 2.0], axis=2)

-  return boxes, detection_classes, detection_scores, num_detections
+  return boxes


 def prediction_tensors_to_temporal_offsets(
@@ -753,7 +737,8 @@ def refine_keypoints(regressed_keypoints,
                     candidate_search_scale=0.3,
                     candidate_ranking_mode='min_distance',
                     score_distance_offset=1e-6,
-                     keypoint_depth_candidates=None):
+                     keypoint_depth_candidates=None,
+                     keypoint_score_threshold=0.1):
  """Refines regressed keypoints by snapping to the nearest candidate keypoints.

  The initial regressed keypoints represent a full set of keypoints regressed
@@ -817,6 +802,8 @@ def refine_keypoints(regressed_keypoints,
    keypoint_depth_candidates: (optional) A float tensor of shape
      [batch_size, max_candidates, num_keypoints] indicating the depths for
      keypoint candidates.
+    keypoint_score_threshold: float, The heatmap score threshold for
+      a keypoint to become a valid candidate.

  Returns:
    A tuple with:
@@ -903,42 +890,40 @@ def refine_keypoints(regressed_keypoints,
                                     keypoint_depth_candidates))

  if bboxes is None:
-    # Create bboxes from regressed keypoints.
-    # Shape [batch_size * num_instances, 4].
-    regressed_keypoints_flattened = tf.reshape(
-        regressed_keypoints, [-1, num_keypoints, 2])
-    bboxes_flattened = keypoint_ops.keypoints_to_enclosing_bounding_boxes(
-        regressed_keypoints_flattened)
+    # Filter out the chosen candidate with score lower than unmatched
+    # keypoint score.
+    mask = tf.cast(nearby_candidate_scores <
+                   keypoint_score_threshold, tf.int32)
  else:
    bboxes_flattened = tf.reshape(bboxes, [-1, 4])

-  # Scale the bounding boxes.
-  # Shape [batch_size, num_instances, 4].
-  boxlist = box_list.BoxList(bboxes_flattened)
-  boxlist_scaled = box_list_ops.scale_height_width(
-      boxlist, box_scale, box_scale)
-  bboxes_scaled = boxlist_scaled.get()
-  bboxes = tf.reshape(bboxes_scaled, [batch_size, num_instances, 4])
-
-  # Get ymin, xmin, ymax, xmax bounding box coordinates, tiled per keypoint.
-  # Shape [batch_size, num_instances, num_keypoints].
-  bboxes_tiled = tf.tile(tf.expand_dims(bboxes, 2), [1, 1, num_keypoints, 1])
-  ymin, xmin, ymax, xmax = tf.unstack(bboxes_tiled, axis=3)
-
-  # Produce a mask that indicates whether the original regressed keypoint
-  # should be used instead of a candidate keypoint.
-  # Shape [batch_size, num_instances, num_keypoints].
-  search_radius = (
-      tf.math.maximum(ymax - ymin, xmax - xmin) * candidate_search_scale)
-  mask = (tf.cast(nearby_candidate_coords[:, :, :, 0] < ymin, tf.int32) +
-          tf.cast(nearby_candidate_coords[:, :, :, 0] > ymax, tf.int32) +
-          tf.cast(nearby_candidate_coords[:, :, :, 1] < xmin, tf.int32) +
-          tf.cast(nearby_candidate_coords[:, :, :, 1] > xmax, tf.int32) +
-          # Filter out the chosen candidate with score lower than unmatched
-          # keypoint score.
-          tf.cast(nearby_candidate_scores <
-                  unmatched_keypoint_score, tf.int32) +
-          tf.cast(min_distances > search_radius, tf.int32))
+    # Scale the bounding boxes.
+    # Shape [batch_size, num_instances, 4].
+    boxlist = box_list.BoxList(bboxes_flattened)
+    boxlist_scaled = box_list_ops.scale_height_width(
+        boxlist, box_scale, box_scale)
+    bboxes_scaled = boxlist_scaled.get()
+    bboxes = tf.reshape(bboxes_scaled, [batch_size, num_instances, 4])
+
+    # Get ymin, xmin, ymax, xmax bounding box coordinates, tiled per keypoint.
+    # Shape [batch_size, num_instances, num_keypoints].
+    bboxes_tiled = tf.tile(tf.expand_dims(bboxes, 2), [1, 1, num_keypoints, 1])
+    ymin, xmin, ymax, xmax = tf.unstack(bboxes_tiled, axis=3)
+
+    # Produce a mask that indicates whether the original regressed keypoint
+    # should be used instead of a candidate keypoint.
+    # Shape [batch_size, num_instances, num_keypoints].
+    search_radius = (
+        tf.math.maximum(ymax - ymin, xmax - xmin) * candidate_search_scale)
+    mask = (tf.cast(nearby_candidate_coords[:, :, :, 0] < ymin, tf.int32) +
+            tf.cast(nearby_candidate_coords[:, :, :, 0] > ymax, tf.int32) +
+            tf.cast(nearby_candidate_coords[:, :, :, 1] < xmin, tf.int32) +
+            tf.cast(nearby_candidate_coords[:, :, :, 1] > xmax, tf.int32) +
+            # Filter out the chosen candidate with score lower than unmatched
+            # keypoint score.
+            tf.cast(nearby_candidate_scores <
+                    keypoint_score_threshold, tf.int32) +
+            tf.cast(min_distances > search_radius, tf.int32))
  mask = mask > 0

  # Create refined keypoints where candidate keypoints replace original
@@ -3289,24 +3274,30 @@ class CenterNetMetaArch(model.DetectionModel):
            k=self._center_params.max_box_predictions))
    multiclass_scores = tf.gather_nd(
        object_center_prob, tf.stack([y_indices, x_indices], -1), batch_dims=1)
-    boxes_strided, classes, scores, num_detections = (
-        prediction_tensors_to_boxes(
-            detection_scores, y_indices, x_indices, channel_indices,
-            prediction_dict[BOX_SCALE][-1], prediction_dict[BOX_OFFSET][-1]))
-
-    boxes = convert_strided_predictions_to_normalized_boxes(
-        boxes_strided, self._stride, true_image_shapes)

+    num_detections = tf.reduce_sum(tf.to_int32(detection_scores > 0), axis=1)
    postprocess_dict = {
-        fields.DetectionResultFields.detection_boxes: boxes,
-        fields.DetectionResultFields.detection_scores: scores,
+        fields.DetectionResultFields.detection_scores: detection_scores,
        fields.DetectionResultFields.detection_multiclass_scores:
            multiclass_scores,
-        fields.DetectionResultFields.detection_classes: classes,
+        fields.DetectionResultFields.detection_classes: channel_indices,
        fields.DetectionResultFields.num_detections: num_detections,
-        'detection_boxes_strided': boxes_strided
    }

+    if self._od_params:
+      boxes_strided = (
+          prediction_tensors_to_boxes(y_indices, x_indices,
+                                      prediction_dict[BOX_SCALE][-1],
+                                      prediction_dict[BOX_OFFSET][-1]))
+
+      boxes = convert_strided_predictions_to_normalized_boxes(
+          boxes_strided, self._stride, true_image_shapes)
+
+      postprocess_dict.update({
+          fields.DetectionResultFields.detection_boxes: boxes,
+          'detection_boxes_strided': boxes_strided
+      })
+
    if self._kp_params_dict:
      # If the model is trained to predict only one class of object and its
      # keypoint, we fall back to a simpler postprocessing function which uses
@@ -3315,7 +3306,7 @@ class CenterNetMetaArch(model.DetectionModel):
      if len(self._kp_params_dict) == 1 and self._num_classes == 1:
        (keypoints, keypoint_scores,
         keypoint_depths) = self._postprocess_keypoints_single_class(
-             prediction_dict, classes, y_indices, x_indices, boxes_strided,
+             prediction_dict, channel_indices, y_indices, x_indices, None,
             num_detections)
        keypoints, keypoint_scores = (
            convert_strided_predictions_to_normalized_keypoints(
@@ -3334,21 +3325,30 @@ class CenterNetMetaArch(model.DetectionModel):
            for kp_dict in self._kp_params_dict.values()
        ])
        keypoints, keypoint_scores = self._postprocess_keypoints_multi_class(
-            prediction_dict, classes, y_indices, x_indices,
-            boxes_strided, num_detections)
+            prediction_dict, channel_indices, y_indices, x_indices,
+            None, num_detections)
        keypoints, keypoint_scores = (
            convert_strided_predictions_to_normalized_keypoints(
                keypoints, keypoint_scores, self._stride, true_image_shapes,
                clip_out_of_frame_keypoints=clip_keypoints))

      # Update instance scores based on keypoints.
-      scores = self._rescore_instances(classes, scores, keypoint_scores)
+      scores = self._rescore_instances(
+          channel_indices, detection_scores, keypoint_scores)
      postprocess_dict.update({
          fields.DetectionResultFields.detection_scores: scores,
          fields.DetectionResultFields.detection_keypoints: keypoints,
          fields.DetectionResultFields.detection_keypoint_scores:
              keypoint_scores
      })
+      if self._od_params is None:
+        # Still output the box prediction by enclosing the keypoints for
+        # evaluation purpose.
+        boxes = keypoint_ops.keypoints_to_enclosing_bounding_boxes(
+            keypoints, keypoints_axis=2)
+        postprocess_dict.update({
+            fields.DetectionResultFields.detection_boxes: boxes,
+        })

    if self._mask_params:
      masks = tf.nn.sigmoid(prediction_dict[SEGMENTATION_HEATMAP][-1])
@@ -3360,7 +3360,7 @@ class CenterNetMetaArch(model.DetectionModel):
        densepose_class_index = self._densepose_params.class_id
      instance_masks, surface_coords = (
          convert_strided_predictions_to_instance_masks(
-              boxes, classes, masks, true_image_shapes,
+              boxes, channel_indices, masks, true_image_shapes,
              densepose_part_heatmap, densepose_surface_coords,
              stride=self._stride, mask_height=self._mask_params.mask_height,
              mask_width=self._mask_params.mask_width,
@@ -3601,7 +3601,7 @@ class CenterNetMetaArch(model.DetectionModel):
    """
    total_num_keypoints = sum(len(kp_dict.keypoint_indices) for kp_dict
                              in self._kp_params_dict.values())
-    batch_size, max_detections, _ = _get_shape(boxes, 3)
+    batch_size, max_detections = _get_shape(classes, 2)
    kpt_coords_for_example_list = []
    kpt_scores_for_example_list = []
    for ex_ind in range(batch_size):
@@ -3626,7 +3626,10 @@ class CenterNetMetaArch(model.DetectionModel):
          # Gather the feature map locations corresponding to the object class.
          y_indices_for_kpt_class = tf.gather(y_indices, instance_inds, axis=1)
          x_indices_for_kpt_class = tf.gather(x_indices, instance_inds, axis=1)
-          boxes_for_kpt_class = tf.gather(boxes, instance_inds, axis=1)
+          if boxes is None:
+            boxes_for_kpt_class = None
+          else:
+            boxes_for_kpt_class = tf.gather(boxes, instance_inds, axis=1)

          # Postprocess keypoints and scores for class and single image. Shapes
          # are [1, num_instances_i, num_keypoints_i, 2] and
@@ -3735,7 +3738,7 @@ class CenterNetMetaArch(model.DetectionModel):
      keypoint_depth_predictions = prediction_dict[get_keypoint_name(
          task_name, KEYPOINT_DEPTH)][-1]

-    batch_size, _, _ = _get_shape(boxes, 3)
+    batch_size, _ = _get_shape(classes, 2)
    kpt_coords_for_example_list = []
    kpt_scores_for_example_list = []
    kpt_depths_for_example_list = []
@@ -3863,7 +3866,10 @@ class CenterNetMetaArch(model.DetectionModel):
                                                   ...]
    y_indices = y_indices[batch_index:batch_index+1, ...]
    x_indices = x_indices[batch_index:batch_index+1, ...]
-    boxes_slice = boxes[batch_index:batch_index+1, ...]
+    if boxes is None:
+      boxes_slice = None
+    else:
+      boxes_slice = boxes[batch_index:batch_index+1, ...]

    # Gather the regressed keypoints. Final tensor has shape
    # [1, num_instances, num_keypoints, 2].
@@ -3901,7 +3907,9 @@ class CenterNetMetaArch(model.DetectionModel):
        candidate_search_scale=kp_params.candidate_search_scale,
        candidate_ranking_mode=kp_params.candidate_ranking_mode,
        score_distance_offset=kp_params.score_distance_offset,
-        keypoint_depth_candidates=keypoint_depth_candidates)
+        keypoint_depth_candidates=keypoint_depth_candidates,
+        keypoint_score_threshold=(
+            kp_params.keypoint_candidate_score_threshold))

    return refined_keypoints, refined_scores, refined_depths


--- a/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
@@ -25,6 +25,7 @@ import numpy as np
 import tensorflow.compat.v1 as tf

 from object_detection.builders import post_processing_builder
+from object_detection.core import keypoint_ops
 from object_detection.core import losses
 from object_detection.core import preprocessor
 from object_detection.core import standard_fields as fields
@@ -588,18 +589,15 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
      hw_pred_tensor = tf.constant(hw_pred)
      offset_pred_tensor = tf.constant(offset_pred)

-      detection_scores, y_indices, x_indices, channel_indices = (
+      _, y_indices, x_indices, _ = (
          cnma.top_k_feature_map_locations(
              class_pred_tensor, max_pool_kernel_size=3, k=2))

-      boxes, classes, scores, num_dets = cnma.prediction_tensors_to_boxes(
-          detection_scores, y_indices, x_indices, channel_indices,
-          hw_pred_tensor, offset_pred_tensor)
-      return boxes, classes, scores, num_dets
-
-    boxes, classes, scores, num_dets = self.execute(graph_fn, [])
+      boxes = cnma.prediction_tensors_to_boxes(
+          y_indices, x_indices, hw_pred_tensor, offset_pred_tensor)
+      return boxes

-    np.testing.assert_array_equal(num_dets, [2, 2, 2])
+    boxes = self.execute(graph_fn, [])

    np.testing.assert_allclose(
        [[-9, -8, 31, 52], [25, 35, 75, 85]], boxes[0])
@@ -608,14 +606,6 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
    np.testing.assert_allclose(
        [[69.5, 74.5, 90.5, 99.5], [40, 75, 80, 105]], boxes[2])

-    np.testing.assert_array_equal(classes[0], [1, 0])
-    np.testing.assert_array_equal(classes[1], [2, 1])
-    np.testing.assert_array_equal(classes[2], [0, 4])
-
-    np.testing.assert_allclose(scores[0], [.7, .55])
-    np.testing.assert_allclose(scores[1][:1], [.9])
-    np.testing.assert_allclose(scores[2], [1., .8])
-
  def test_offset_prediction(self):

    class_pred = np.zeros((3, 128, 128, 5), dtype=np.float32)
@@ -1068,12 +1058,19 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
      keypoint_scores = tf.constant(keypoint_scores_np, dtype=tf.float32)
      num_keypoint_candidates = tf.constant(num_keypoints_candidates_np,
                                            dtype=tf.int32)
+      # The behavior of bboxes=None is different now. We provide the bboxes
+      # explicitly by using the regressed keypoints to create the same
+      # behavior.
+      regressed_keypoints_flattened = tf.reshape(
+          regressed_keypoints, [-1, 3, 2])
+      bboxes_flattened = keypoint_ops.keypoints_to_enclosing_bounding_boxes(
+          regressed_keypoints_flattened)
      (refined_keypoints, refined_scores, _) = cnma.refine_keypoints(
          regressed_keypoints,
          keypoint_candidates,
          keypoint_scores,
          num_keypoint_candidates,
-          bboxes=None,
+          bboxes=bboxes_flattened,
          unmatched_keypoint_score=unmatched_keypoint_score,
          box_scale=1.2,
          candidate_search_scale=0.3,
@@ -1144,6 +1141,85 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
    np.testing.assert_allclose(expected_refined_keypoints, refined_keypoints)
    np.testing.assert_allclose(expected_refined_scores, refined_scores)

+  def test_refine_keypoints_without_bbox(self):
+    regressed_keypoints_np = np.array(
+        [
+            # Example 0.
+            [
+                [[2.0, 2.0], [6.0, 10.0], [14.0, 7.0]],  # Instance 0.
+                [[0.0, 6.0], [3.0, 3.0], [5.0, 7.0]],  # Instance 1.
+            ],
+        ], dtype=np.float32)
+    keypoint_candidates_np = np.array(
+        [
+            # Example 0.
+            [
+                [[2.0, 2.5], [6.0, 10.5], [4.0, 7.0]],  # Candidate 0.
+                [[1.0, 8.0], [0.0, 0.0], [2.0, 2.0]],  # Candidate 1.
+                [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]],  # Candidate 2.
+            ],
+        ], dtype=np.float32)
+    keypoint_scores_np = np.array(
+        [
+            # Example 0.
+            [
+                [0.8, 0.9, 1.0],  # Candidate 0.
+                [0.6, 0.1, 0.9],  # Candidate 1.
+                [0.0, 0.0, 0.0],  # Candidate 1.
+            ],
+        ], dtype=np.float32)
+    num_keypoints_candidates_np = np.array(
+        [
+            # Example 0.
+            [2, 2, 2],
+        ], dtype=np.int32)
+    unmatched_keypoint_score = 0.1
+
+    def graph_fn():
+      regressed_keypoints = tf.constant(
+          regressed_keypoints_np, dtype=tf.float32)
+      keypoint_candidates = tf.constant(
+          keypoint_candidates_np, dtype=tf.float32)
+      keypoint_scores = tf.constant(keypoint_scores_np, dtype=tf.float32)
+      num_keypoint_candidates = tf.constant(num_keypoints_candidates_np,
+                                            dtype=tf.int32)
+      (refined_keypoints, refined_scores, _) = cnma.refine_keypoints(
+          regressed_keypoints,
+          keypoint_candidates,
+          keypoint_scores,
+          num_keypoint_candidates,
+          bboxes=None,
+          unmatched_keypoint_score=unmatched_keypoint_score,
+          box_scale=1.2,
+          candidate_search_scale=0.3,
+          candidate_ranking_mode='min_distance')
+      return refined_keypoints, refined_scores
+
+    refined_keypoints, refined_scores = self.execute(graph_fn, [])
+
+    # The expected refined keypoints pick the ones that are closest to the
+    # regressed keypoint locations without filtering out the candidates which
+    # are outside of the bounding box.
+    expected_refined_keypoints = np.array(
+        [
+            # Example 0.
+            [
+                [[2.0, 2.5], [6.0, 10.5], [4.0, 7.0]],  # Instance 0.
+                [[1.0, 8.0], [0.0, 0.0], [4.0, 7.0]],  # Instance 1.
+            ],
+        ], dtype=np.float32)
+    expected_refined_scores = np.array(
+        [
+            # Example 0.
+            [
+                [0.8, 0.9, 1.0],  # Instance 0.
+                [0.6, 0.1, 1.0],  # Instance 1.
+            ],
+        ], dtype=np.float32)
+
+    np.testing.assert_allclose(expected_refined_keypoints, refined_keypoints)
+    np.testing.assert_allclose(expected_refined_scores, refined_scores)
+
  @parameterized.parameters({'predict_depth': True}, {'predict_depth': False})
  def test_refine_keypoints_with_bboxes(self, predict_depth):
    regressed_keypoints_np = np.array(
@@ -1489,7 +1565,8 @@ def build_center_net_meta_arch(build_resnet=False,
                               per_keypoint_offset=False,
                               predict_depth=False,
                               per_keypoint_depth=False,
-                               peak_radius=0):
+                               peak_radius=0,
+                               keypoint_only=False):
  """Builds the CenterNet meta architecture."""
  if build_resnet:
    feature_extractor = (
@@ -1522,7 +1599,23 @@ def build_center_net_meta_arch(build_resnet=False,
    non_max_suppression_fn, _ = post_processing_builder.build(
        post_processing_proto)

-  if detection_only:
+  if keypoint_only:
+    num_candidates_per_keypoint = 100 if max_box_predictions > 1 else 1
+    return cnma.CenterNetMetaArch(
+        is_training=True,
+        add_summaries=False,
+        num_classes=num_classes,
+        feature_extractor=feature_extractor,
+        image_resizer_fn=image_resizer_fn,
+        object_center_params=get_fake_center_params(max_box_predictions),
+        keypoint_params_dict={
+            _TASK_NAME:
+                get_fake_kp_params(num_candidates_per_keypoint,
+                                   per_keypoint_offset, predict_depth,
+                                   per_keypoint_depth, peak_radius)
+        },
+        non_max_suppression_fn=non_max_suppression_fn)
+  elif detection_only:
    return cnma.CenterNetMetaArch(
        is_training=True,
        add_summaries=False,
@@ -1825,7 +1918,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
    class_center = np.zeros((1, 32, 32, 10), dtype=np.float32)
    height_width = np.zeros((1, 32, 32, 2), dtype=np.float32)
    offset = np.zeros((1, 32, 32, 2), dtype=np.float32)
-    keypoint_heatmaps = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32)
+    keypoint_heatmaps = np.ones(
+        (1, 32, 32, num_keypoints), dtype=np.float32) * _logit(0.001)
    keypoint_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32)
    keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2)

@@ -1971,6 +2065,66 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
          detections['detection_surface_coords'][0, 0, :, :],
          np.zeros_like(detections['detection_surface_coords'][0, 0, :, :]))

+  def test_postprocess_kpts_no_od(self):
+    """Test the postprocess function."""
+    target_class_id = 1
+    model = build_center_net_meta_arch(keypoint_only=True)
+    max_detection = model._center_params.max_box_predictions
+    num_keypoints = len(model._kp_params_dict[_TASK_NAME].keypoint_indices)
+
+    class_center = np.zeros((1, 32, 32, 10), dtype=np.float32)
+    keypoint_heatmaps = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32)
+    keypoint_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32)
+    keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2)
+
+    class_probs = np.ones(10) * _logit(0.25)
+    class_probs[target_class_id] = _logit(0.75)
+    class_center[0, 16, 16] = class_probs
+    keypoint_regression[0, 16, 16] = [
+        -1., -1.,
+        -1., 1.,
+        1., -1.,
+        1., 1.]
+    keypoint_heatmaps[0, 14, 14, 0] = _logit(0.9)
+    keypoint_heatmaps[0, 14, 18, 1] = _logit(0.9)
+    keypoint_heatmaps[0, 18, 14, 2] = _logit(0.9)
+    keypoint_heatmaps[0, 18, 18, 3] = _logit(0.05)  # Note the low score.
+
+    class_center = tf.constant(class_center)
+    keypoint_heatmaps = tf.constant(keypoint_heatmaps, dtype=tf.float32)
+    keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32)
+    keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32)
+
+    prediction_dict = {
+        cnma.OBJECT_CENTER: [class_center],
+        cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_HEATMAP):
+            [keypoint_heatmaps],
+        cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_OFFSET):
+            [keypoint_offsets],
+        cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION):
+            [keypoint_regression],
+    }
+
+    # def graph_fn():
+    detections = model.postprocess(prediction_dict,
+                                   tf.constant([[128, 128, 3]]))
+      # return detections
+
+    # detections = self.execute_cpu(graph_fn, [])
+    self.assertAllClose(detections['detection_scores'][0],
+                        [.75, .5, .5, .5, .5])
+    expected_multiclass_scores = [.25] * 10
+    expected_multiclass_scores[target_class_id] = .75
+    self.assertAllClose(expected_multiclass_scores,
+                        detections['detection_multiclass_scores'][0][0])
+
+    self.assertEqual(detections['detection_classes'][0, 0], target_class_id)
+    self.assertEqual(detections['num_detections'], [5])
+    self.assertAllEqual([1, max_detection, num_keypoints, 2],
+                        detections['detection_keypoints'].shape)
+    self.assertAllEqual([1, max_detection, num_keypoints],
+                        detections['detection_keypoint_scores'].shape)
+
  def test_non_max_suppression(self):
    """Tests application of NMS on CenterNet detections."""
    target_class_id = 1
@@ -2149,7 +2303,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
    class_center = np.zeros((1, 32, 32, 1), dtype=np.float32)
    height_width = np.zeros((1, 32, 32, 2), dtype=np.float32)
    offset = np.zeros((1, 32, 32, 2), dtype=np.float32)
-    keypoint_heatmaps = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32)
+    keypoint_heatmaps = np.ones(
+        (1, 32, 32, num_keypoints), dtype=np.float32) * _logit(0.001)
    keypoint_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32)
    keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2)