"graphbolt/vscode:/vscode.git/clone" did not exist on "9c36ddcd2b7a1a2e1f5b5362a379538916de23cd"
Commit bd303e0a authored by Yu-hui Chen's avatar Yu-hui Chen Committed by TF Object Detection Team
Browse files

Updated the tf_example_decoder to support the case where the keypoint labels in

the image/object/keypoint/text feature but not in the keypoint label map.

PiperOrigin-RevId: 404372105
parent dd933006
...@@ -611,7 +611,6 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -611,7 +611,6 @@ class TfExampleDecoder(data_decoder.DataDecoder):
np.nan * tf.ones_like(tensor_dict[gt_kpt_fld])) np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))
else: else:
num_instances = tf.shape(tensor_dict['groundtruth_classes'])[0] num_instances = tf.shape(tensor_dict['groundtruth_classes'])[0]
def true_fn(num_instances): def true_fn(num_instances):
"""Logics to process the tensor when num_instances is not zero.""" """Logics to process the tensor when num_instances is not zero."""
kpts_idx = tf.cast(self._kpts_name_to_id_table.lookup( kpts_idx = tf.cast(self._kpts_name_to_id_table.lookup(
...@@ -625,19 +624,25 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -625,19 +624,25 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[1, num_kpt_texts]) [1, num_kpt_texts])
# Prepare the index of the keypoints to scatter the keypoint # Prepare the index of the keypoints to scatter the keypoint
# coordinates: [num_kpts_texts * num_instances, 2]. # coordinates: [num_kpts_texts * num_instances, 2].
kpt_idx = tf.concat([ full_kpt_idx = tf.concat([
tf.reshape( tf.reshape(
instance_idx, shape=[num_kpt_texts * num_instances, 1]), instance_idx, shape=[num_kpt_texts * num_instances, 1]),
tf.expand_dims(kpts_idx, axis=-1) tf.expand_dims(kpts_idx, axis=-1)
], axis=1) ], axis=1)
# Get the mask and gather only the keypoints with non-negative
# indices (i.e. the keypoint labels in the image/object/keypoint/text
# but do not exist in the label map).
valid_mask = tf.greater_equal(kpts_idx, 0)
full_kpt_idx = tf.boolean_mask(full_kpt_idx, valid_mask)
gt_kpt = tf.scatter_nd( gt_kpt = tf.scatter_nd(
kpt_idx, full_kpt_idx,
tensor_dict[gt_kpt_fld], tf.boolean_mask(tensor_dict[gt_kpt_fld], valid_mask),
shape=[num_instances, self._num_keypoints, 2]) shape=[num_instances, self._num_keypoints, 2])
gt_kpt_vis = tf.cast(tf.scatter_nd( gt_kpt_vis = tf.cast(tf.scatter_nd(
kpt_idx, full_kpt_idx,
tensor_dict[gt_kpt_vis_fld], tf.boolean_mask(tensor_dict[gt_kpt_vis_fld], valid_mask),
shape=[num_instances, self._num_keypoints]), dtype=tf.bool) shape=[num_instances, self._num_keypoints]), dtype=tf.bool)
visibilities_tiled = tf.tile( visibilities_tiled = tf.tile(
tf.expand_dims(gt_kpt_vis, axis=-1), [1, 1, 2]) tf.expand_dims(gt_kpt_vis, axis=-1), [1, 1, 2])
...@@ -1091,3 +1096,4 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -1091,3 +1096,4 @@ class TfExampleDecoder(data_decoder.DataDecoder):
new_object_field = tf.repeat( new_object_field = tf.repeat(
object_field, tf.reduce_sum(expanded_indices, axis=1), axis=0) object_field, tf.reduce_sum(expanded_indices, axis=1), axis=0)
return new_object_field return new_object_field
...@@ -620,6 +620,116 @@ class TfExampleDecoderTest(test_case.TestCase): ...@@ -620,6 +620,116 @@ class TfExampleDecoderTest(test_case.TestCase):
self.assertAllEqual( self.assertAllEqual(
np.zeros([2, 2], dtype=np.bool), output[gt_kpts_vis_fld][:, 3:]) np.zeros([2, 2], dtype=np.bool), output[gt_kpts_vis_fld][:, 3:])
def testDecodeKeypointWithKptsLabelsNotInText(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg, _ = self._create_encoded_and_decoded_data(
image_tensor, 'jpeg')
bbox_classes = [0, 1]
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
keypoint_visibility = [1, 2, 0, 1, 0, 2]
keypoint_texts = [
six.b('nose'), six.b('left_eye'), six.b('right_eye'), six.b('nose'),
six.b('left_eye'), six.b('right_eye')
]
label_map_string = """
item: {
id: 1
name: 'face'
display_name: 'face'
keypoints {
id: 0
label: "missing_part"
}
keypoints {
id: 2
label: "right_eye"
}
keypoints {
id: 3
label: "nose"
}
}
item: {
id: 2
name: 'person'
display_name: 'person'
keypoints {
id: 1
label: "left_eye"
}
}
"""
label_map_proto_file = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_proto_file, 'wb') as f:
f.write(label_map_string)
def graph_fn():
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature(six.b('jpeg')),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
'image/object/keypoint/y':
dataset_util.float_list_feature(keypoint_ys),
'image/object/keypoint/x':
dataset_util.float_list_feature(keypoint_xs),
'image/object/keypoint/visibility':
dataset_util.int64_list_feature(keypoint_visibility),
'image/object/keypoint/text':
dataset_util.bytes_list_feature(keypoint_texts),
'image/object/class/label':
dataset_util.int64_list_feature(bbox_classes),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_proto_file, num_keypoints=5,
use_keypoint_label_map=True)
output = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((output[
fields.InputDataFields.groundtruth_boxes].get_shape().as_list()),
[None, 4])
self.assertAllEqual((output[
fields.InputDataFields.groundtruth_keypoints].get_shape().as_list()),
[None, 5, 2])
return output
output = self.execute_cpu(graph_fn, [])
expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
output[fields.InputDataFields.groundtruth_boxes])
expected_keypoints = [[[np.nan, np.nan], [1., 2.], [np.nan, np.nan],
[0., 1.], [np.nan, np.nan]],
[[np.nan, np.nan], [np.nan, np.nan], [5., 6.],
[3., 4.], [np.nan, np.nan]]]
gt_kpts_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
self.assertAllClose(expected_keypoints,
output[fields.InputDataFields.groundtruth_keypoints])
expected_visibility = [[False, True, False, True, False],
[False, False, True, True, False]]
gt_kpts_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
self.assertAllEqual(expected_visibility, output[gt_kpts_vis_fld])
def testDecodeKeypointNoVisibilities(self): def testDecodeKeypointNoVisibilities(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg, _ = self._create_encoded_and_decoded_data( encoded_jpeg, _ = self._create_encoded_and_decoded_data(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment