Commit 3ce2f61b authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge branch 'master' of https://github.com/tensorflow/models into context_tf2

parents bb16d5ca 8e9296ff
...@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args: Args:
field: a string key, options are field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints, fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities} or keypoint_visibilities, densepose_*}
fields.InputDataFields.is_annotated. fields.InputDataFields.is_annotated.
Returns: Returns:
...@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args: Args:
field: a string key, options are field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints, fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities} or keypoint_visibilities, densepose_*} or
fields.InputDataFields.is_annotated. fields.InputDataFields.is_annotated.
Returns: Returns:
...@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
detection_classes: [batch, max_detections] detection_classes: [batch, max_detections]
(If a model is producing class-agnostic detections, this field may be (If a model is producing class-agnostic detections, this field may be
missing) missing)
instance_masks: [batch, max_detections, image_height, image_width] detection_masks: [batch, max_detections, mask_height, mask_width]
(optional) (optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional) detection_keypoints: [batch, max_detections, num_keypoints, 2]
(optional)
detection_keypoint_scores: [batch, max_detections, num_keypoints]
(optional)
detection_surface_coords: [batch, max_detections, mask_height,
mask_width, 2] (optional)
num_detections: [batch] num_detections: [batch]
In addition to the above fields this stage also outputs the following In addition to the above fields this stage also outputs the following
...@@ -288,19 +293,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -288,19 +293,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
""" """
pass pass
def provide_groundtruth(self, def provide_groundtruth(
groundtruth_boxes_list, self,
groundtruth_classes_list, groundtruth_boxes_list,
groundtruth_masks_list=None, groundtruth_classes_list,
groundtruth_keypoints_list=None, groundtruth_masks_list=None,
groundtruth_keypoint_visibilities_list=None, groundtruth_keypoints_list=None,
groundtruth_weights_list=None, groundtruth_keypoint_visibilities_list=None,
groundtruth_confidences_list=None, groundtruth_dp_num_points_list=None,
groundtruth_is_crowd_list=None, groundtruth_dp_part_ids_list=None,
groundtruth_group_of_list=None, groundtruth_dp_surface_coords_list=None,
groundtruth_area_list=None, groundtruth_weights_list=None,
is_annotated_list=None, groundtruth_confidences_list=None,
groundtruth_labeled_classes=None): groundtruth_is_crowd_list=None,
groundtruth_group_of_list=None,
groundtruth_area_list=None,
is_annotated_list=None,
groundtruth_labeled_classes=None):
"""Provide groundtruth tensors. """Provide groundtruth tensors.
Args: Args:
...@@ -324,6 +333,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -324,6 +333,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
`groundtruth_keypoint_visibilities_list`). `groundtruth_keypoint_visibilities_list`).
groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors
of shape [num_boxes, num_keypoints] containing keypoint visibilities. of shape [num_boxes, num_keypoints] containing keypoint visibilities.
groundtruth_dp_num_points_list: a list of 1-D tf.int32 tensors of shape
[num_boxes] containing the number of DensePose sampled points.
groundtruth_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding.
groundtruth_dp_surface_coords_list: a list of 3-D tf.float32 tensors of
shape [num_boxes, max_sampled_points, 4] containing the DensePose
surface coordinates for each sampled point. Note that there may be
padding.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes. [num_boxes] containing weights for groundtruth boxes.
groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
...@@ -361,6 +379,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): ...@@ -361,6 +379,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
self._groundtruth_lists[ self._groundtruth_lists[
fields.BoxListFields.keypoint_visibilities] = ( fields.BoxListFields.keypoint_visibilities] = (
groundtruth_keypoint_visibilities_list) groundtruth_keypoint_visibilities_list)
if groundtruth_dp_num_points_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_num_points] = (
groundtruth_dp_num_points_list)
if groundtruth_dp_part_ids_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_part_ids] = (
groundtruth_dp_part_ids_list)
if groundtruth_dp_surface_coords_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_surface_coords] = (
groundtruth_dp_surface_coords_list)
if groundtruth_is_crowd_list: if groundtruth_is_crowd_list:
self._groundtruth_lists[ self._groundtruth_lists[
fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
......
...@@ -3984,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights, ...@@ -3984,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
Args: Args:
image: rank 3 float32 tensor containing 1 image -> image: rank 3 float32 tensor containing 1 image ->
[height, width,channels]. [height, width, channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary Boxes are in normalized form meaning their coordinates vary
between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax]. between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
...@@ -4128,6 +4128,131 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights, ...@@ -4128,6 +4128,131 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
return return_values return return_values
def random_scale_crop_and_pad_to_square(
image,
boxes,
labels,
label_weights,
masks=None,
keypoints=None,
scale_min=0.1,
scale_max=2.0,
output_size=512,
resize_method=tf.image.ResizeMethod.BILINEAR,
seed=None):
"""Randomly scale, crop, and then pad an image to fixed square dimensions.
Randomly scale, crop, and then pad an image to the desired square output
dimensions. Specifically, this method first samples a random_scale factor
from a uniform distribution between scale_min and scale_max, and then resizes
the image such that it's maximum dimension is (output_size * random_scale).
Secondly, a square output_size crop is extracted from the resized image
(note, this will only occur when random_scale > 1.0). Lastly, the cropped
region is padded to the desired square output_size, by filling with zeros.
The augmentation is borrowed from [1]
[1]: https://arxiv.org/abs/1911.09070
Args:
image: rank 3 float32 tensor containing 1 image ->
[height, width, channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
are in normalized form meaning their coordinates vary between [0, 1]. Each
row is in the form of [ymin, xmin, ymax, xmax]. Boxes on the crop boundary
are clipped to the boundary and boxes falling outside the crop are
ignored.
labels: rank 1 int32 tensor containing the object classes.
label_weights: float32 tensor of shape [num_instances] representing the
weight for each box.
masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
width] containing instance masks. The masks are of the same height, width
as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
scale_min: float, the minimum value for the random scale factor.
scale_max: float, the maximum value for the random scale factor.
output_size: int, the desired (square) output image size.
resize_method: tf.image.ResizeMethod, resize method to use when scaling the
input images.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
label_weights: rank 1 float32 tensor with shape [num_instances].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
"""
img_shape = tf.shape(image)
input_height, input_width = img_shape[0], img_shape[1]
random_scale = tf.random_uniform([], scale_min, scale_max, seed=seed)
# Compute the scaled height and width from the random scale.
max_input_dim = tf.cast(tf.maximum(input_height, input_width), tf.float32)
input_ar_y = tf.cast(input_height, tf.float32) / max_input_dim
input_ar_x = tf.cast(input_width, tf.float32) / max_input_dim
scaled_height = tf.cast(random_scale * output_size * input_ar_y, tf.int32)
scaled_width = tf.cast(random_scale * output_size * input_ar_x, tf.int32)
# Compute the offsets:
offset_y = tf.cast(scaled_height - output_size, tf.float32)
offset_x = tf.cast(scaled_width - output_size, tf.float32)
offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1, seed=seed)
offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1, seed=seed)
offset_y = tf.cast(offset_y, tf.int32)
offset_x = tf.cast(offset_x, tf.int32)
# Scale, crop, and pad the input image.
scaled_image = tf.image.resize_images(
image, [scaled_height, scaled_width], method=resize_method)
scaled_image = scaled_image[offset_y:offset_y + output_size,
offset_x:offset_x + output_size, :]
output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0, output_size,
output_size)
# Update the boxes.
new_window = tf.cast(
tf.stack([offset_y, offset_x,
offset_y + output_size, offset_x + output_size]),
dtype=tf.float32)
new_window /= tf.cast(
tf.stack([scaled_height, scaled_width, scaled_height, scaled_width]),
dtype=tf.float32)
boxlist = box_list.BoxList(boxes)
boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
boxlist, indices = box_list_ops.prune_completely_outside_window(
boxlist, [0.0, 0.0, 1.0, 1.0])
boxlist = box_list_ops.clip_to_window(
boxlist, [0.0, 0.0, 1.0, 1.0], filter_nonoverlapping=False)
return_values = [output_image, boxlist.get(),
tf.gather(labels, indices),
tf.gather(label_weights, indices)]
if masks is not None:
new_masks = tf.expand_dims(masks, -1)
new_masks = tf.image.resize_images(
new_masks, [scaled_height, scaled_width], method=resize_method)
new_masks = new_masks[:, offset_y:offset_y + output_size,
offset_x:offset_x + output_size, :]
new_masks = tf.image.pad_to_bounding_box(
new_masks, 0, 0, output_size, output_size)
new_masks = tf.squeeze(new_masks, [-1])
return_values.append(tf.gather(new_masks, indices))
if keypoints is not None:
keypoints = tf.gather(keypoints, indices)
keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
keypoints = keypoint_ops.prune_outside_window(
keypoints, [0.0, 0.0, 1.0, 1.0])
return_values.append(keypoints)
return return_values
def get_default_func_arg_map(include_label_weights=True, def get_default_func_arg_map(include_label_weights=True,
include_label_confidences=False, include_label_confidences=False,
include_multiclass_scores=False, include_multiclass_scores=False,
...@@ -4230,15 +4355,14 @@ def get_default_func_arg_map(include_label_weights=True, ...@@ -4230,15 +4355,14 @@ def get_default_func_arg_map(include_label_weights=True,
random_adjust_saturation: (fields.InputDataFields.image,), random_adjust_saturation: (fields.InputDataFields.image,),
random_distort_color: (fields.InputDataFields.image,), random_distort_color: (fields.InputDataFields.image,),
random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,), random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
random_crop_image: (fields.InputDataFields.image, random_crop_image:
fields.InputDataFields.groundtruth_boxes, (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_boxes,
groundtruth_label_weights, fields.InputDataFields.groundtruth_classes,
groundtruth_label_confidences, multiclass_scores, groundtruth_label_weights, groundtruth_label_confidences,
groundtruth_instance_masks, groundtruth_keypoints, multiclass_scores, groundtruth_instance_masks, groundtruth_keypoints,
groundtruth_keypoint_visibilities, groundtruth_keypoint_visibilities, groundtruth_dp_num_points,
groundtruth_dp_num_points, groundtruth_dp_part_ids, groundtruth_dp_part_ids, groundtruth_dp_surface_coords),
groundtruth_dp_surface_coords),
random_pad_image: random_pad_image:
(fields.InputDataFields.image, (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks, fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
...@@ -4361,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True, ...@@ -4361,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True,
fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_instance_masks, groundtruth_label_weights, groundtruth_instance_masks,
groundtruth_keypoints), groundtruth_keypoints),
random_scale_crop_and_pad_to_square:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_instance_masks,
groundtruth_keypoints),
} }
return prep_func_arg_map return prep_func_arg_map
......
...@@ -712,76 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -712,76 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
test_masks=True, test_masks=True,
test_keypoints=True) test_keypoints=True)
@parameterized.parameters(
{'include_dense_pose': False},
{'include_dense_pose': True}
)
def testRunRandomHorizontalFlipWithMaskAndKeypoints(self, include_dense_pose):
def graph_fn():
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
image_height = 3
image_width = 3
images = tf.random_uniform([1, image_height, image_width, 3])
boxes = self.createTestBoxes()
masks = self.createTestMasks()
keypoints, keypoint_visibilities = self.createTestKeypoints()
dp_num_point, dp_part_ids, dp_surface_coords = self.createTestDensePose()
keypoint_flip_permutation = self.createKeypointFlipPermutation()
tensor_dict = {
fields.InputDataFields.image:
images,
fields.InputDataFields.groundtruth_boxes:
boxes,
fields.InputDataFields.groundtruth_instance_masks:
masks,
fields.InputDataFields.groundtruth_keypoints:
keypoints,
fields.InputDataFields.groundtruth_keypoint_visibilities:
keypoint_visibilities
}
if include_dense_pose:
tensor_dict.update({
fields.InputDataFields.groundtruth_dp_num_points: dp_num_point,
fields.InputDataFields.groundtruth_dp_part_ids: dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords:
dp_surface_coords
})
preprocess_options = [(preprocessor.random_horizontal_flip, {
'keypoint_flip_permutation': keypoint_flip_permutation
})]
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True,
include_keypoints=True,
include_keypoint_visibilities=True,
include_dense_pose=include_dense_pose)
tensor_dict = preprocessor.preprocess(
tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
keypoint_visibilities = tensor_dict[
fields.InputDataFields.groundtruth_keypoint_visibilities]
output_tensors = [boxes, masks, keypoints, keypoint_visibilities]
if include_dense_pose:
dp_num_points = tensor_dict[
fields.InputDataFields.groundtruth_dp_num_points]
dp_part_ids = tensor_dict[
fields.InputDataFields.groundtruth_dp_part_ids]
dp_surface_coords = tensor_dict[
fields.InputDataFields.groundtruth_dp_surface_coords]
output_tensors.extend([dp_num_points, dp_part_ids, dp_surface_coords])
return output_tensors
output_tensors = self.execute_cpu(graph_fn, [])
self.assertIsNotNone(output_tensors[0]) # Boxes.
self.assertIsNotNone(output_tensors[1]) # Masks.
self.assertIsNotNone(output_tensors[2]) # Keypoints
self.assertIsNotNone(output_tensors[3]) # Keypoint Visibilities.
if include_dense_pose:
self.assertIsNotNone(output_tensors[4]) # DensePose Num Points.
self.assertIsNotNone(output_tensors[5]) # DensePose Part IDs.
self.assertIsNotNone(output_tensors[6]) # DensePose Surface Coords
def testRandomVerticalFlip(self): def testRandomVerticalFlip(self):
...@@ -2380,7 +2310,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -2380,7 +2310,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
@parameterized.parameters( @parameterized.parameters(
{'include_dense_pose': False}, {'include_dense_pose': False},
{'include_dense_pose': True}
) )
def testRandomPadImageWithKeypointsAndMasks(self, include_dense_pose): def testRandomPadImageWithKeypointsAndMasks(self, include_dense_pose):
def graph_fn(): def graph_fn():
...@@ -3912,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -3912,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
size = max(image.shape) size = max(image.shape)
self.assertAlmostEqual(scale * 256.0, size) self.assertAlmostEqual(scale * 256.0, size)
self.assertAllClose(image[:, :, 0], masks[0, :, :])
@parameterized.named_parameters(('scale_0_1', 0.1), ('scale_1_0', 1.0),
('scale_2_0', 2.0))
def test_random_scale_crop_and_pad_to_square(self, scale):
def graph_fn():
image = np.random.randn(512, 256, 1)
box_centers = [0.25, 0.5, 0.75]
box_size = 0.1
box_corners = []
box_labels = []
box_label_weights = []
keypoints = []
masks = []
for center_y in box_centers:
for center_x in box_centers:
box_corners.append(
[center_y - box_size / 2.0, center_x - box_size / 2.0,
center_y + box_size / 2.0, center_x + box_size / 2.0])
box_labels.append([1])
box_label_weights.append([1.])
keypoints.append(
[[center_y - box_size / 2.0, center_x - box_size / 2.0],
[center_y + box_size / 2.0, center_x + box_size / 2.0]])
masks.append(image[:, :, 0].reshape(512, 256))
image = tf.constant(image)
boxes = tf.constant(box_corners)
labels = tf.constant(box_labels)
label_weights = tf.constant(box_label_weights)
keypoints = tf.constant(keypoints)
masks = tf.constant(np.stack(masks))
(new_image, new_boxes, _, _, new_masks,
new_keypoints) = preprocessor.random_scale_crop_and_pad_to_square(
image,
boxes,
labels,
label_weights,
masks=masks,
keypoints=keypoints,
scale_min=scale,
scale_max=scale,
output_size=512)
return new_image, new_boxes, new_masks, new_keypoints
image, boxes, masks, keypoints = self.execute_cpu(graph_fn, [])
# Since random_scale_crop_and_pad_to_square may prune and clip boxes,
# we only need to find one of the boxes that was not clipped and check
# that it matches the expected dimensions. Note, assertAlmostEqual(a, b)
# is equivalent to round(a-b, 7) == 0.
any_box_has_correct_size = False
effective_scale_y = int(scale * 512) / 512.0
effective_scale_x = int(scale * 256) / 512.0
expected_size_y = 0.1 * effective_scale_y
expected_size_x = 0.1 * effective_scale_x
for box in boxes:
ymin, xmin, ymax, xmax = box
any_box_has_correct_size |= (
(round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
(round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
(round((ymax - ymin) - expected_size_y, 7) == 0.0) and
(round((xmax - xmin) - expected_size_x, 7) == 0.0))
self.assertTrue(any_box_has_correct_size)
# Similar to the approach above where we check for at least one box with the
# expected dimensions, we check for at least one pair of keypoints whose
# distance matches the expected dimensions.
any_keypoint_pair_has_correct_dist = False
for keypoint_pair in keypoints:
ymin, xmin = keypoint_pair[0]
ymax, xmax = keypoint_pair[1]
any_keypoint_pair_has_correct_dist |= (
(round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
(round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
(round((ymax - ymin) - expected_size_y, 7) == 0.0) and
(round((xmax - xmin) - expected_size_x, 7) == 0.0))
self.assertTrue(any_keypoint_pair_has_correct_dist)
self.assertAlmostEqual(512.0, image.shape[0])
self.assertAlmostEqual(512.0, image.shape[1])
self.assertAllClose(image[:, :, 0], self.assertAllClose(image[:, :, 0],
masks[0, :, :]) masks[0, :, :])
......
...@@ -141,6 +141,8 @@ class DetectionResultFields(object): ...@@ -141,6 +141,8 @@ class DetectionResultFields(object):
for detection boxes in the image including background class. for detection boxes in the image including background class.
detection_classes: detection-level class labels. detection_classes: detection-level class labels.
detection_masks: contains a segmentation mask for each detection box. detection_masks: contains a segmentation mask for each detection box.
detection_surface_coords: contains DensePose surface coordinates for each
box.
detection_boundaries: contains an object boundary for each detection box. detection_boundaries: contains an object boundary for each detection box.
detection_keypoints: contains detection keypoints for each detection box. detection_keypoints: contains detection keypoints for each detection box.
detection_keypoint_scores: contains detection keypoint scores. detection_keypoint_scores: contains detection keypoint scores.
...@@ -161,6 +163,7 @@ class DetectionResultFields(object): ...@@ -161,6 +163,7 @@ class DetectionResultFields(object):
detection_features = 'detection_features' detection_features = 'detection_features'
detection_classes = 'detection_classes' detection_classes = 'detection_classes'
detection_masks = 'detection_masks' detection_masks = 'detection_masks'
detection_surface_coords = 'detection_surface_coords'
detection_boundaries = 'detection_boundaries' detection_boundaries = 'detection_boundaries'
detection_keypoints = 'detection_keypoints' detection_keypoints = 'detection_keypoints'
detection_keypoint_scores = 'detection_keypoint_scores' detection_keypoint_scores = 'detection_keypoint_scores'
...@@ -182,7 +185,11 @@ class BoxListFields(object): ...@@ -182,7 +185,11 @@ class BoxListFields(object):
masks: masks per bounding box. masks: masks per bounding box.
boundaries: boundaries per bounding box. boundaries: boundaries per bounding box.
keypoints: keypoints per bounding box. keypoints: keypoints per bounding box.
keypoint_visibilities: keypoint visibilities per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box. keypoint_heatmaps: keypoint heatmaps per bounding box.
densepose_num_points: number of DensePose points per bounding box.
densepose_part_ids: DensePose part ids per bounding box.
densepose_surface_coords: DensePose surface coordinates per bounding box.
is_crowd: is_crowd annotation per bounding box. is_crowd: is_crowd annotation per bounding box.
""" """
boxes = 'boxes' boxes = 'boxes'
...@@ -196,6 +203,9 @@ class BoxListFields(object): ...@@ -196,6 +203,9 @@ class BoxListFields(object):
keypoints = 'keypoints' keypoints = 'keypoints'
keypoint_visibilities = 'keypoint_visibilities' keypoint_visibilities = 'keypoint_visibilities'
keypoint_heatmaps = 'keypoint_heatmaps' keypoint_heatmaps = 'keypoint_heatmaps'
densepose_num_points = 'densepose_num_points'
densepose_part_ids = 'densepose_part_ids'
densepose_surface_coords = 'densepose_surface_coords'
is_crowd = 'is_crowd' is_crowd = 'is_crowd'
group_of = 'group_of' group_of = 'group_of'
......
...@@ -45,6 +45,7 @@ from object_detection.box_coders import mean_stddev_box_coder ...@@ -45,6 +45,7 @@ from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_coder from object_detection.core import box_coder
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import box_list_ops from object_detection.core import box_list_ops
from object_detection.core import densepose_ops
from object_detection.core import keypoint_ops from object_detection.core import keypoint_ops
from object_detection.core import matcher as mat from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc from object_detection.core import region_similarity_calculator as sim_calc
...@@ -799,17 +800,15 @@ def get_batch_predictions_from_indices(batch_predictions, indices): ...@@ -799,17 +800,15 @@ def get_batch_predictions_from_indices(batch_predictions, indices):
function. function.
Args: Args:
batch_predictions: A tensor of shape [batch_size, height, width, 2] for batch_predictions: A tensor of shape [batch_size, height, width, channels]
single class offsets and [batch_size, height, width, class, 2] for or [batch_size, height, width, class, channels] for class-specific
multiple classes offsets (e.g. keypoint joint offsets) representing the features (e.g. keypoint joint offsets).
(height, width) or (y_offset, x_offset) predictions over a batch. indices: A tensor of shape [num_instances, 3] for single class features or
indices: A tensor of shape [num_instances, 3] for single class offset and [num_instances, 4] for multiple classes features.
[num_instances, 4] for multiple classes offsets representing the indices
in the batch to be penalized in a loss function
Returns: Returns:
values: A tensor of shape [num_instances, 2] holding the predicted values values: A tensor of shape [num_instances, channels] holding the predicted
at the given indices. values at the given indices.
""" """
return tf.gather_nd(batch_predictions, indices) return tf.gather_nd(batch_predictions, indices)
...@@ -1657,3 +1656,118 @@ class CenterNetMaskTargetAssigner(object): ...@@ -1657,3 +1656,118 @@ class CenterNetMaskTargetAssigner(object):
segmentation_target = tf.stack(segmentation_targets_list, axis=0) segmentation_target = tf.stack(segmentation_targets_list, axis=0)
return segmentation_target return segmentation_target
class CenterNetDensePoseTargetAssigner(object):
"""Wrapper to compute targets for DensePose task."""
def __init__(self, stride, num_parts=24):
self._stride = stride
self._num_parts = num_parts
def assign_part_and_coordinate_targets(self,
height,
width,
gt_dp_num_points_list,
gt_dp_part_ids_list,
gt_dp_surface_coords_list,
gt_weights_list=None):
"""Returns the DensePose part_id and coordinate targets and their indices.
The returned values are expected to be used with predicted tensors
of size (batch_size, height//self._stride, width//self._stride, 2). The
predicted values at the relevant indices can be retrieved with the
get_batch_predictions_from_indices function.
Args:
height: int, height of input to the model. This is used to determine the
height of the output.
width: int, width of the input to the model. This is used to determine the
width of the output.
gt_dp_num_points_list: a list of 1-D tf.int32 tensors of shape [num_boxes]
containing the number of DensePose sampled points per box.
gt_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding, as
boxes may contain a different number of sampled points.
gt_dp_surface_coords_list: a list of 3-D tf.float32 tensors of shape
[num_boxes, max_sampled_points, 4] containing the DensePose surface
coordinates (normalized) for each sampled point. Note that there may be
padding.
gt_weights_list: A list of 1-D tensors with shape [num_boxes]
corresponding to the weight of each groundtruth detection box.
Returns:
batch_indices: an integer tensor of shape [num_total_points, 4] holding
the indices inside the predicted tensor which should be penalized. The
first column indicates the index along the batch dimension and the
second and third columns indicate the index along the y and x
dimensions respectively. The fourth column is the part index.
batch_part_ids: an int tensor of shape [num_total_points, num_parts]
holding 1-hot encodings of parts for each sampled point.
batch_surface_coords: a float tensor of shape [num_total_points, 2]
holding the expected (v, u) coordinates for each sampled point.
batch_weights: a float tensor of shape [num_total_points] indicating the
weight of each prediction.
Note that num_total_points = batch_size * num_boxes * max_sampled_points.
"""
if gt_weights_list is None:
gt_weights_list = [None] * len(gt_dp_num_points_list)
batch_indices = []
batch_part_ids = []
batch_surface_coords = []
batch_weights = []
for i, (num_points, part_ids, surface_coords, weights) in enumerate(
zip(gt_dp_num_points_list, gt_dp_part_ids_list,
gt_dp_surface_coords_list, gt_weights_list)):
num_boxes, max_sampled_points = (
shape_utils.combined_static_and_dynamic_shape(part_ids))
part_ids_flattened = tf.reshape(part_ids, [-1])
part_ids_one_hot = tf.one_hot(part_ids_flattened, depth=self._num_parts)
# Get DensePose coordinates in the output space.
surface_coords_abs = densepose_ops.to_absolute_coordinates(
surface_coords, height // self._stride, width // self._stride)
surface_coords_abs = tf.reshape(surface_coords_abs, [-1, 4])
# Each tensor has shape [num_boxes * max_sampled_points].
yabs, xabs, v, u = tf.unstack(surface_coords_abs, axis=-1)
# Get the indices (in output space) for the DensePose coordinates. Note
# that if self._stride is larger than 1, this will have the effect of
# reducing spatial resolution of the groundtruth points.
indices_y = tf.cast(yabs, tf.int32)
indices_x = tf.cast(xabs, tf.int32)
# Assign ones if weights are not provided.
if weights is None:
weights = tf.ones(num_boxes, dtype=tf.float32)
# Create per-point weights.
weights_per_point = tf.reshape(
tf.tile(weights[:, tf.newaxis], multiples=[1, max_sampled_points]),
shape=[-1])
# Mask out invalid (i.e. padded) DensePose points.
num_points_tiled = tf.tile(num_points[:, tf.newaxis],
multiples=[1, max_sampled_points])
range_tiled = tf.tile(tf.range(max_sampled_points)[tf.newaxis, :],
multiples=[num_boxes, 1])
valid_points = tf.math.less(range_tiled, num_points_tiled)
valid_points = tf.cast(tf.reshape(valid_points, [-1]), dtype=tf.float32)
weights_per_point = weights_per_point * valid_points
# Shape of [num_boxes * max_sampled_points] integer tensor filled with
# current batch index.
batch_index = i * tf.ones_like(indices_y, dtype=tf.int32)
batch_indices.append(
tf.stack([batch_index, indices_y, indices_x, part_ids_flattened],
axis=1))
batch_part_ids.append(part_ids_one_hot)
batch_surface_coords.append(tf.stack([v, u], axis=1))
batch_weights.append(weights_per_point)
batch_indices = tf.concat(batch_indices, axis=0)
batch_part_ids = tf.concat(batch_part_ids, axis=0)
batch_surface_coords = tf.concat(batch_surface_coords, axis=0)
batch_weights = tf.concat(batch_weights, axis=0)
return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
...@@ -1906,6 +1906,99 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase): ...@@ -1906,6 +1906,99 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase):
expected_seg_target, segmentation_target) expected_seg_target, segmentation_target)
class CenterNetDensePoseTargetAssignerTest(test_case.TestCase):
def test_assign_part_and_coordinate_targets(self):
def graph_fn():
gt_dp_num_points_list = [
# Example 0.
tf.constant([2, 0, 3], dtype=tf.int32),
# Example 1.
tf.constant([1, 1], dtype=tf.int32),
]
gt_dp_part_ids_list = [
# Example 0.
tf.constant([[1, 6, 0],
[0, 0, 0],
[0, 2, 3]], dtype=tf.int32),
# Example 1.
tf.constant([[7, 0, 0],
[0, 0, 0]], dtype=tf.int32),
]
gt_dp_surface_coords_list = [
# Example 0.
tf.constant(
[[[0.11, 0.2, 0.3, 0.4], # Box 0.
[0.6, 0.4, 0.1, 0.0],
[0.0, 0.0, 0.0, 0.0]],
[[0.0, 0.0, 0.0, 0.0], # Box 1.
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0]],
[[0.22, 0.1, 0.6, 0.8], # Box 2.
[0.0, 0.4, 0.5, 1.0],
[0.3, 0.2, 0.4, 0.1]]],
dtype=tf.float32),
# Example 1.
tf.constant(
[[[0.5, 0.5, 0.3, 1.0], # Box 0.
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0]],
[[0.2, 0.2, 0.5, 0.8], # Box 1.
[0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0]]],
dtype=tf.float32),
]
gt_weights_list = [
# Example 0.
tf.constant([1.0, 1.0, 0.5], dtype=tf.float32),
# Example 1.
tf.constant([0.0, 1.0], dtype=tf.float32),
]
cn_assigner = targetassigner.CenterNetDensePoseTargetAssigner(stride=4)
batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
cn_assigner.assign_part_and_coordinate_targets(
height=120,
width=80,
gt_dp_num_points_list=gt_dp_num_points_list,
gt_dp_part_ids_list=gt_dp_part_ids_list,
gt_dp_surface_coords_list=gt_dp_surface_coords_list,
gt_weights_list=gt_weights_list))
return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
self.execute(graph_fn, []))
expected_batch_indices = np.array([
# Example 0. e.g.
# The first set of indices is calculated as follows:
# floor(0.11*120/4) = 3, floor(0.2*80/4) = 4.
[0, 3, 4, 1], [0, 18, 8, 6], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
[0, 0, 0, 0], [0, 6, 2, 0], [0, 0, 8, 2], [0, 9, 4, 3],
# Example 1.
[1, 15, 10, 7], [1, 0, 0, 0], [1, 0, 0, 0], [1, 6, 4, 0], [1, 0, 0, 0],
[1, 0, 0, 0]
], dtype=np.int32)
expected_batch_part_ids = tf.one_hot(
[1, 6, 0, 0, 0, 0, 0, 2, 3, 7, 0, 0, 0, 0, 0], depth=24).numpy()
expected_batch_surface_coords = np.array([
# Box 0.
[0.3, 0.4], [0.1, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
[0.6, 0.8], [0.5, 1.0], [0.4, 0.1],
# Box 1.
[0.3, 1.0], [0.0, 0.0], [0.0, 0.0], [0.5, 0.8], [0.0, 0.0], [0.0, 0.0],
], np.float32)
expected_batch_weights = np.array([
# Box 0.
1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5,
# Box 1.
0.0, 0.0, 0.0, 1.0, 0.0, 0.0
], dtype=np.float32)
self.assertAllEqual(expected_batch_indices, batch_indices)
self.assertAllEqual(expected_batch_part_ids, batch_part_ids)
self.assertAllClose(expected_batch_surface_coords, batch_surface_coords)
self.assertAllClose(expected_batch_weights, batch_weights)
if __name__ == '__main__': if __name__ == '__main__':
tf.enable_v2_behavior() tf.enable_v2_behavior()
tf.test.main() tf.test.main()
...@@ -50,14 +50,16 @@ import io ...@@ -50,14 +50,16 @@ import io
import itertools import itertools
import json import json
import os import os
from absl import app
import apache_beam as beam
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class ReKeyDataFn(beam.DoFn): class ReKeyDataFn(beam.DoFn):
"""Re-keys tfrecords by sequence_key. """Re-keys tfrecords by sequence_key.
...@@ -932,4 +934,4 @@ def main(argv=None, save_main_session=True): ...@@ -932,4 +934,4 @@ def main(argv=None, save_main_session=True):
if __name__ == '__main__': if __name__ == '__main__':
app.run(main) main()
...@@ -22,7 +22,7 @@ import datetime ...@@ -22,7 +22,7 @@ import datetime
import os import os
import tempfile import tempfile
import unittest import unittest
import apache_beam as beam
import numpy as np import numpy as np
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
...@@ -31,6 +31,12 @@ from object_detection.dataset_tools.context_rcnn import add_context_to_examples ...@@ -31,6 +31,12 @@ from object_detection.dataset_tools.context_rcnn import add_context_to_examples
from object_detection.utils import tf_version from object_detection.utils import tf_version
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
@contextlib.contextmanager @contextlib.contextmanager
def InMemoryTFRecord(entries): def InMemoryTFRecord(entries):
temp = tempfile.NamedTemporaryFile(delete=False) temp = tempfile.NamedTemporaryFile(delete=False)
......
...@@ -39,13 +39,16 @@ import io ...@@ -39,13 +39,16 @@ import io
import json import json
import logging import logging
import os import os
from absl import app
import apache_beam as beam
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection.utils import dataset_util from object_detection.utils import dataset_util
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class ParseImage(beam.DoFn): class ParseImage(beam.DoFn):
"""A DoFn that parses a COCO-CameraTraps json and emits TFRecords.""" """A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
...@@ -338,4 +341,4 @@ def main(argv=None, save_main_session=True): ...@@ -338,4 +341,4 @@ def main(argv=None, save_main_session=True):
if __name__ == '__main__': if __name__ == '__main__':
app.run(main) main()
...@@ -22,7 +22,6 @@ import os ...@@ -22,7 +22,6 @@ import os
import tempfile import tempfile
import unittest import unittest
import apache_beam as beam
import numpy as np import numpy as np
from PIL import Image from PIL import Image
...@@ -30,6 +29,11 @@ import tensorflow.compat.v1 as tf ...@@ -30,6 +29,11 @@ import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
from object_detection.utils import tf_version from object_detection.utils import tf_version
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.') @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
......
...@@ -48,9 +48,11 @@ from __future__ import print_function ...@@ -48,9 +48,11 @@ from __future__ import print_function
import argparse import argparse
import os import os
import threading import threading
from absl import app
import apache_beam as beam
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class GenerateDetectionDataFn(beam.DoFn): class GenerateDetectionDataFn(beam.DoFn):
...@@ -290,4 +292,4 @@ def main(argv=None, save_main_session=True): ...@@ -290,4 +292,4 @@ def main(argv=None, save_main_session=True):
if __name__ == '__main__': if __name__ == '__main__':
app.run(main) main()
...@@ -22,7 +22,6 @@ import contextlib ...@@ -22,7 +22,6 @@ import contextlib
import os import os
import tempfile import tempfile
import unittest import unittest
import apache_beam as beam
import numpy as np import numpy as np
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
...@@ -39,6 +38,11 @@ if six.PY2: ...@@ -39,6 +38,11 @@ if six.PY2:
else: else:
mock = unittest.mock mock = unittest.mock
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class FakeModel(model.DetectionModel): class FakeModel(model.DetectionModel):
"""A Fake Detection model with expected output nodes from post-processing.""" """A Fake Detection model with expected output nodes from post-processing."""
......
...@@ -34,7 +34,8 @@ python tensorflow_models/object_detection/export_inference_graph.py \ ...@@ -34,7 +34,8 @@ python tensorflow_models/object_detection/export_inference_graph.py \
--input_type tf_example \ --input_type tf_example \
--pipeline_config_path path/to/faster_rcnn_model.config \ --pipeline_config_path path/to/faster_rcnn_model.config \
--trained_checkpoint_prefix path/to/model.ckpt \ --trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory --output_directory path/to/exported_model_directory \
--additional_output_tensor_names detection_features
python generate_embedding_data.py \ python generate_embedding_data.py \
--alsologtostderr \ --alsologtostderr \
...@@ -52,13 +53,15 @@ import datetime ...@@ -52,13 +53,15 @@ import datetime
import os import os
import threading import threading
from absl import app
import apache_beam as beam
import numpy as np import numpy as np
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class GenerateEmbeddingDataFn(beam.DoFn): class GenerateEmbeddingDataFn(beam.DoFn):
"""Generates embedding data for camera trap images. """Generates embedding data for camera trap images.
...@@ -410,5 +413,7 @@ def main(argv=None, save_main_session=True): ...@@ -410,5 +413,7 @@ def main(argv=None, save_main_session=True):
p.run() p.run()
if __name__ == '__main__': if __name__ == '__main__':
app.run(main) main()
...@@ -21,7 +21,6 @@ import contextlib ...@@ -21,7 +21,6 @@ import contextlib
import os import os
import tempfile import tempfile
import unittest import unittest
import apache_beam as beam
import numpy as np import numpy as np
import six import six
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
...@@ -38,6 +37,11 @@ if six.PY2: ...@@ -38,6 +37,11 @@ if six.PY2:
else: else:
mock = unittest.mock mock = unittest.mock
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class FakeModel(model.DetectionModel): class FakeModel(model.DetectionModel):
"""A Fake Detection model with expected output nodes from post-processing.""" """A Fake Detection model with expected output nodes from post-processing."""
......
...@@ -51,25 +51,25 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, ...@@ -51,25 +51,25 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
feature_map = { feature_map = {
standard_fields.TfExampleFields.object_bbox_ymin: standard_fields.TfExampleFields.object_bbox_ymin:
dataset_util.float_list_feature( dataset_util.float_list_feature(
filtered_data_frame_boxes.YMin.as_matrix()), filtered_data_frame_boxes.YMin.to_numpy()),
standard_fields.TfExampleFields.object_bbox_xmin: standard_fields.TfExampleFields.object_bbox_xmin:
dataset_util.float_list_feature( dataset_util.float_list_feature(
filtered_data_frame_boxes.XMin.as_matrix()), filtered_data_frame_boxes.XMin.to_numpy()),
standard_fields.TfExampleFields.object_bbox_ymax: standard_fields.TfExampleFields.object_bbox_ymax:
dataset_util.float_list_feature( dataset_util.float_list_feature(
filtered_data_frame_boxes.YMax.as_matrix()), filtered_data_frame_boxes.YMax.to_numpy()),
standard_fields.TfExampleFields.object_bbox_xmax: standard_fields.TfExampleFields.object_bbox_xmax:
dataset_util.float_list_feature( dataset_util.float_list_feature(
filtered_data_frame_boxes.XMax.as_matrix()), filtered_data_frame_boxes.XMax.to_numpy()),
standard_fields.TfExampleFields.object_class_text: standard_fields.TfExampleFields.object_class_text:
dataset_util.bytes_list_feature([ dataset_util.bytes_list_feature([
six.ensure_binary(label_text) six.ensure_binary(label_text)
for label_text in filtered_data_frame_boxes.LabelName.as_matrix() for label_text in filtered_data_frame_boxes.LabelName.to_numpy()
]), ]),
standard_fields.TfExampleFields.object_class_label: standard_fields.TfExampleFields.object_class_label:
dataset_util.int64_list_feature( dataset_util.int64_list_feature(
filtered_data_frame_boxes.LabelName.map( filtered_data_frame_boxes.LabelName.map(
lambda x: label_map[x]).as_matrix()), lambda x: label_map[x]).to_numpy()),
standard_fields.TfExampleFields.filename: standard_fields.TfExampleFields.filename:
dataset_util.bytes_feature( dataset_util.bytes_feature(
six.ensure_binary('{}.jpg'.format(image_id))), six.ensure_binary('{}.jpg'.format(image_id))),
...@@ -82,31 +82,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, ...@@ -82,31 +82,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
if 'IsGroupOf' in filtered_data_frame.columns: if 'IsGroupOf' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields. feature_map[standard_fields.TfExampleFields.
object_group_of] = dataset_util.int64_list_feature( object_group_of] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int)) filtered_data_frame_boxes.IsGroupOf.to_numpy().astype(int))
if 'IsOccluded' in filtered_data_frame.columns: if 'IsOccluded' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields. feature_map[standard_fields.TfExampleFields.
object_occluded] = dataset_util.int64_list_feature( object_occluded] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsOccluded.as_matrix().astype( filtered_data_frame_boxes.IsOccluded.to_numpy().astype(
int)) int))
if 'IsTruncated' in filtered_data_frame.columns: if 'IsTruncated' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields. feature_map[standard_fields.TfExampleFields.
object_truncated] = dataset_util.int64_list_feature( object_truncated] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsTruncated.as_matrix().astype( filtered_data_frame_boxes.IsTruncated.to_numpy().astype(
int)) int))
if 'IsDepiction' in filtered_data_frame.columns: if 'IsDepiction' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields. feature_map[standard_fields.TfExampleFields.
object_depiction] = dataset_util.int64_list_feature( object_depiction] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsDepiction.as_matrix().astype( filtered_data_frame_boxes.IsDepiction.to_numpy().astype(
int)) int))
if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns: if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
feature_map[standard_fields.TfExampleFields. feature_map[standard_fields.TfExampleFields.
image_class_label] = dataset_util.int64_list_feature( image_class_label] = dataset_util.int64_list_feature(
filtered_data_frame_labels.LabelName.map( filtered_data_frame_labels.LabelName.map(
lambda x: label_map[x]).as_matrix()) lambda x: label_map[x]).to_numpy())
feature_map[standard_fields.TfExampleFields feature_map[standard_fields.TfExampleFields
.image_class_text] = dataset_util.bytes_list_feature([ .image_class_text] = dataset_util.bytes_list_feature([
six.ensure_binary(label_text) for label_text in six.ensure_binary(label_text) for label_text in
filtered_data_frame_labels.LabelName.as_matrix() filtered_data_frame_labels.LabelName.to_numpy()
]), ]),
return tf.train.Example(features=tf.train.Features(feature=feature_map)) return tf.train.Example(features=tf.train.Features(feature=feature_map))
...@@ -25,20 +25,17 @@ RUN useradd -ms /bin/bash tensorflow ...@@ -25,20 +25,17 @@ RUN useradd -ms /bin/bash tensorflow
USER tensorflow USER tensorflow
WORKDIR /home/tensorflow WORKDIR /home/tensorflow
# Install pip dependencies
RUN pip3 install --user absl-py
RUN pip3 install --user contextlib2
RUN pip3 install --user Cython
RUN pip3 install --user jupyter
RUN pip3 install --user matplotlib
RUN pip3 install --user pycocotools
RUN pip3 install --user tf-slim
# Copy this version of of the model garden into the image # Copy this version of of the model garden into the image
COPY --chown=tensorflow . /home/tensorflow/models COPY --chown=tensorflow . /home/tensorflow/models
# Compile protobuf configs # Compile protobuf configs
RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.) RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
WORKDIR /home/tensorflow/models/research/
RUN cp object_detection/packages/tf1/setup.py ./
ENV PATH="/home/tensorflow/.local/bin:${PATH}"
RUN python -m pip install --user -U pip
RUN python -m pip install --user .
ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
ENV TF_CPP_MIN_LOG_LEVEL 3 ENV TF_CPP_MIN_LOG_LEVEL 3
# Tensorflow Object Detection on Docker # TensorFlow Object Detection on Docker
These instructions are experimental. These instructions are experimental.
...@@ -6,6 +6,6 @@ These instructions are experimental. ...@@ -6,6 +6,6 @@ These instructions are experimental.
```bash ```bash
# From the root of the git repository # From the root of the git repository
docker build -f research/object_detection/dockerfiles/1.15/Dockerfile -t od . docker build -f research/object_detection/dockerfiles/tf1/Dockerfile -t od .
docker run -it od docker run -it od
``` ```
...@@ -25,20 +25,17 @@ RUN useradd -ms /bin/bash tensorflow ...@@ -25,20 +25,17 @@ RUN useradd -ms /bin/bash tensorflow
USER tensorflow USER tensorflow
WORKDIR /home/tensorflow WORKDIR /home/tensorflow
# Install pip dependencies
RUN pip3 install --user absl-py
RUN pip3 install --user contextlib2
RUN pip3 install --user Cython
RUN pip3 install --user jupyter
RUN pip3 install --user matplotlib
RUN pip3 install --user pycocotools
RUN pip3 install --user tf-slim
# Copy this version of of the model garden into the image # Copy this version of of the model garden into the image
COPY --chown=tensorflow . /home/tensorflow/models COPY --chown=tensorflow . /home/tensorflow/models
# Compile protobuf configs # Compile protobuf configs
RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.) RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
WORKDIR /home/tensorflow/models/research/
RUN cp object_detection/packages/tf2/setup.py ./
ENV PATH="/home/tensorflow/.local/bin:${PATH}"
RUN python -m pip install -U pip
RUN python -m pip install .
ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
ENV TF_CPP_MIN_LOG_LEVEL 3 ENV TF_CPP_MIN_LOG_LEVEL 3
# Tensorflow Object Detection on Docker # TensorFlow Object Detection on Docker
These instructions are experimental. These instructions are experimental.
...@@ -6,6 +6,6 @@ These instructions are experimental. ...@@ -6,6 +6,6 @@ These instructions are experimental.
```bash ```bash
# From the root of the git repository # From the root of the git repository
docker build -f research/object_detection/dockerfiles/2.2/Dockerfile -t od . docker build -f research/object_detection/dockerfiles/tf2/Dockerfile -t od .
docker run -it od docker run -it od
``` ```
...@@ -552,7 +552,11 @@ def _resize_detection_masks(args): ...@@ -552,7 +552,11 @@ def _resize_detection_masks(args):
detection_boxes, detection_masks, image_shape = args detection_boxes, detection_masks, image_shape = args
detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image_shape[0], image_shape[1]) detection_masks, detection_boxes, image_shape[0], image_shape[1])
return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8) # If the masks are currently float, binarize them. Otherwise keep them as
# integers, since they have already been thresholded.
if detection_masks_reframed.dtype == tf.float32:
detection_masks_reframed = tf.greater(detection_masks_reframed, 0.5)
return tf.cast(detection_masks_reframed, tf.uint8)
def _resize_groundtruth_masks(args): def _resize_groundtruth_masks(args):
...@@ -570,6 +574,17 @@ def _resize_groundtruth_masks(args): ...@@ -570,6 +574,17 @@ def _resize_groundtruth_masks(args):
return tf.cast(tf.squeeze(mask, 3), tf.uint8) return tf.cast(tf.squeeze(mask, 3), tf.uint8)
def _resize_surface_coordinate_masks(args):
detection_boxes, surface_coords, image_shape = args
surface_coords_v, surface_coords_u = tf.unstack(surface_coords, axis=-1)
surface_coords_v_reframed = ops.reframe_box_masks_to_image_masks(
surface_coords_v, detection_boxes, image_shape[0], image_shape[1])
surface_coords_u_reframed = ops.reframe_box_masks_to_image_masks(
surface_coords_u, detection_boxes, image_shape[0], image_shape[1])
return tf.stack([surface_coords_v_reframed, surface_coords_u_reframed],
axis=-1)
def _scale_keypoint_to_absolute(args): def _scale_keypoint_to_absolute(args):
keypoints, image_shape = args keypoints, image_shape = args
return keypoint_ops.scale(keypoints, image_shape[0], image_shape[1]) return keypoint_ops.scale(keypoints, image_shape[0], image_shape[1])
...@@ -720,6 +735,12 @@ def result_dict_for_batched_example(images, ...@@ -720,6 +735,12 @@ def result_dict_for_batched_example(images,
num_keypoints] bool tensor with keypoint visibilities (Optional). num_keypoints] bool tensor with keypoint visibilities (Optional).
'groundtruth_labeled_classes': [batch_size, num_classes] int64 'groundtruth_labeled_classes': [batch_size, num_classes] int64
tensor of 1-indexed classes. (Optional) tensor of 1-indexed classes. (Optional)
'groundtruth_dp_num_points': [batch_size, max_number_of_boxes] int32
tensor. (Optional)
'groundtruth_dp_part_ids': [batch_size, max_number_of_boxes,
max_sampled_points] int32 tensor. (Optional)
'groundtruth_dp_surface_coords_list': [batch_size, max_number_of_boxes,
max_sampled_points, 4] float32 tensor. (Optional)
class_agnostic: Boolean indicating whether the detections are class-agnostic class_agnostic: Boolean indicating whether the detections are class-agnostic
(i.e. binary). Default False. (i.e. binary). Default False.
scale_to_absolute: Boolean indicating whether boxes and keypoints should be scale_to_absolute: Boolean indicating whether boxes and keypoints should be
...@@ -747,12 +768,16 @@ def result_dict_for_batched_example(images, ...@@ -747,12 +768,16 @@ def result_dict_for_batched_example(images,
'detection_scores': [batch_size, max_detections] float32 tensor of scores. 'detection_scores': [batch_size, max_detections] float32 tensor of scores.
'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed 'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
classes. classes.
'detection_masks': [batch_size, max_detections, H, W] float32 tensor of 'detection_masks': [batch_size, max_detections, H, W] uint8 tensor of
binarized masks, reframed to full image masks. (Optional) instance masks, reframed to full image masks. Note that these may be
binarized (e.g. {0, 1}), or may contain 1-indexed part labels. (Optional)
'detection_keypoints': [batch_size, max_detections, num_keypoints, 2] 'detection_keypoints': [batch_size, max_detections, num_keypoints, 2]
float32 tensor containing keypoint coordinates. (Optional) float32 tensor containing keypoint coordinates. (Optional)
'detection_keypoint_scores': [batch_size, max_detections, num_keypoints] 'detection_keypoint_scores': [batch_size, max_detections, num_keypoints]
float32 tensor containing keypoint scores. (Optional) float32 tensor containing keypoint scores. (Optional)
'detection_surface_coords': [batch_size, max_detection, H, W, 2] float32
tensor with normalized surface coordinates (e.g. DensePose UV
coordinates). (Optional)
'num_detections': [batch_size] int64 tensor containing number of valid 'num_detections': [batch_size] int64 tensor containing number of valid
detections. detections.
'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in 'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
...@@ -844,14 +869,21 @@ def result_dict_for_batched_example(images, ...@@ -844,14 +869,21 @@ def result_dict_for_batched_example(images,
if detection_fields.detection_masks in detections: if detection_fields.detection_masks in detections:
detection_masks = detections[detection_fields.detection_masks] detection_masks = detections[detection_fields.detection_masks]
# TODO(rathodv): This should be done in model's postprocess
# function ideally.
output_dict[detection_fields.detection_masks] = ( output_dict[detection_fields.detection_masks] = (
shape_utils.static_or_dynamic_map_fn( shape_utils.static_or_dynamic_map_fn(
_resize_detection_masks, _resize_detection_masks,
elems=[detection_boxes, detection_masks, elems=[detection_boxes, detection_masks,
original_image_spatial_shapes], original_image_spatial_shapes],
dtype=tf.uint8)) dtype=tf.uint8))
if detection_fields.detection_surface_coords in detections:
detection_surface_coords = detections[
detection_fields.detection_surface_coords]
output_dict[detection_fields.detection_surface_coords] = (
shape_utils.static_or_dynamic_map_fn(
_resize_surface_coordinate_masks,
elems=[detection_boxes, detection_surface_coords,
original_image_spatial_shapes],
dtype=tf.float32))
if detection_fields.detection_keypoints in detections: if detection_fields.detection_keypoints in detections:
detection_keypoints = detections[detection_fields.detection_keypoints] detection_keypoints = detections[detection_fields.detection_keypoints]
...@@ -1074,3 +1106,8 @@ def evaluator_options_from_eval_config(eval_config): ...@@ -1074,3 +1106,8 @@ def evaluator_options_from_eval_config(eval_config):
'recall_upper_bound': (eval_config.recall_upper_bound) 'recall_upper_bound': (eval_config.recall_upper_bound)
} }
return evaluator_options return evaluator_options
def has_densepose(eval_dict):
return (fields.DetectionResultFields.detection_masks in eval_dict and
fields.DetectionResultFields.detection_surface_coords in eval_dict)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment