Unverified Commit 52bb4ab1 authored by vivek rathod's avatar vivek rathod Committed by GitHub
Browse files

Merged commit includes the following changes: (#8803)



320117767  by ronnyvotel:

    DensePose postprocessing implementation.

--
320065853  by ronnyvotel:

    Updating how masks are reframed, so that it works on float and uint8 masks.

--
320061717  by yuhuic:

    Updated CenterNet restore_from_objects to allow the model to load the
    checkpoints saved during training.

--
319835172  by ronnyvotel:

    Updating how the DensePose UV Symmetries MAT file path is constructed and loaded.

--
319834678  by ronnyvotel:

    First update to CenterNetMetaArch for DensePose. Adding prediction and loss functionality.

--
319810261  by rathodv:

    Create a setup.py file to simplify installation.

    Usage:
    "python object_detection/packages/tf1/setup.py install" for TF1.
    "python object_detection/packages/tf2/setup.py install" for TF2.

    or to create source distribution
    "python object_detection/packages/tf1/setup.py sdist" for TF1.
    "python object_detection/packages/tf2/setup.py sdist" for TF2.

--
319803041  by sbeery:

    Updating documentation for export

--
319688087  by rathodv:

    Update as_matrix() to to_numpy() to avoid failures with python3.6

--
319686183  by vighneshb:

    Require tpu_name when use_tpu is set.

--
319613327  by aom:

    EfficientDet-style Data Augmentation.

--
319572180  by rathodv:

    Add TF2 SSD FPN (a.k.a RetinaNet) configs.

--
319553823  by rathodv:

    Internal Change.

--

PiperOrigin-RevId: 320117767
Co-authored-by: default avatarTF Object Detection Team <no-reply@google.com>
parent 3b56ba8d
......@@ -17,9 +17,8 @@
"""Tests for box_predictor_builder."""
import unittest
import mock
from unittest import mock # pylint: disable=g-importing-member
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder
......
......@@ -14,7 +14,7 @@
# ==============================================================================
"""Tests for graph_rewriter_builder."""
import unittest
import mock
from unittest import mock # pylint: disable=g-importing-member
import tensorflow.compat.v1 as tf
import tf_slim as slim
......
......@@ -417,4 +417,12 @@ def build(preprocessor_step_config):
'num_scales': config.num_scales
}
if step_type == 'random_scale_crop_and_pad_to_square':
config = preprocessor_step_config.random_scale_crop_and_pad_to_square
return preprocessor.random_scale_crop_and_pad_to_square, {
'scale_min': config.scale_min,
'scale_max': config.scale_max,
'output_size': config.output_size,
}
raise ValueError('Unknown preprocessing step.')
......@@ -42,9 +42,6 @@ PART_NAMES = [
b'left_face',
]
_SRC_PATH = ('google3/third_party/tensorflow_models/object_detection/'
'dataset_tools/densepose')
def scale(dp_surface_coords, y_scale, x_scale, scope=None):
"""Scales DensePose coordinates in y and x dimensions.
......@@ -266,10 +263,14 @@ class DensePoseHorizontalFlip(object):
def __init__(self):
"""Constructor."""
uv_symmetry_transforms_path = os.path.join(
tf.resource_loader.get_data_files_path(), '..', 'dataset_tools',
'densepose', 'UV_symmetry_transforms.mat')
data = scipy.io.loadmat(uv_symmetry_transforms_path)
path = os.path.dirname(os.path.abspath(__file__))
uv_symmetry_transforms_path = tf.resource_loader.get_path_to_datafile(
os.path.join(path, '..', 'dataset_tools', 'densepose',
'UV_symmetry_transforms.mat'))
tf.logging.info('Loading DensePose symmetry transforms file from {}'.format(
uv_symmetry_transforms_path))
with tf.io.gfile.GFile(uv_symmetry_transforms_path, 'rb') as f:
data = scipy.io.loadmat(f)
# Create lookup maps which indicate how a VU coordinate changes after a
# horizontal flip.
......
......@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities} or
keypoint_visibilities, densepose_*}
fields.InputDataFields.is_annotated.
Returns:
......@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities} or
keypoint_visibilities, densepose_*} or
fields.InputDataFields.is_annotated.
Returns:
......@@ -288,19 +288,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
"""
pass
def provide_groundtruth(self,
groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list=None,
groundtruth_keypoints_list=None,
groundtruth_keypoint_visibilities_list=None,
groundtruth_weights_list=None,
groundtruth_confidences_list=None,
groundtruth_is_crowd_list=None,
groundtruth_group_of_list=None,
groundtruth_area_list=None,
is_annotated_list=None,
groundtruth_labeled_classes=None):
def provide_groundtruth(
self,
groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list=None,
groundtruth_keypoints_list=None,
groundtruth_keypoint_visibilities_list=None,
groundtruth_dp_num_points_list=None,
groundtruth_dp_part_ids_list=None,
groundtruth_dp_surface_coords_list=None,
groundtruth_weights_list=None,
groundtruth_confidences_list=None,
groundtruth_is_crowd_list=None,
groundtruth_group_of_list=None,
groundtruth_area_list=None,
is_annotated_list=None,
groundtruth_labeled_classes=None):
"""Provide groundtruth tensors.
Args:
......@@ -324,6 +328,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
`groundtruth_keypoint_visibilities_list`).
groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors
of shape [num_boxes, num_keypoints] containing keypoint visibilities.
groundtruth_dp_num_points_list: a list of 1-D tf.int32 tensors of shape
[num_boxes] containing the number of DensePose sampled points.
groundtruth_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding.
groundtruth_dp_surface_coords_list: a list of 3-D tf.float32 tensors of
shape [num_boxes, max_sampled_points, 4] containing the DensePose
surface coordinates for each sampled point. Note that there may be
padding.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
......@@ -361,6 +374,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
self._groundtruth_lists[
fields.BoxListFields.keypoint_visibilities] = (
groundtruth_keypoint_visibilities_list)
if groundtruth_dp_num_points_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_num_points] = (
groundtruth_dp_num_points_list)
if groundtruth_dp_part_ids_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_part_ids] = (
groundtruth_dp_part_ids_list)
if groundtruth_dp_surface_coords_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_surface_coords] = (
groundtruth_dp_surface_coords_list)
if groundtruth_is_crowd_list:
self._groundtruth_lists[
fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
......
......@@ -3984,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
Args:
image: rank 3 float32 tensor containing 1 image ->
[height, width,channels].
[height, width, channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
......@@ -4128,6 +4128,131 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
return return_values
def random_scale_crop_and_pad_to_square(
image,
boxes,
labels,
label_weights,
masks=None,
keypoints=None,
scale_min=0.1,
scale_max=2.0,
output_size=512,
resize_method=tf.image.ResizeMethod.BILINEAR,
seed=None):
"""Randomly scale, crop, and then pad an image to fixed square dimensions.
Randomly scale, crop, and then pad an image to the desired square output
dimensions. Specifically, this method first samples a random_scale factor
from a uniform distribution between scale_min and scale_max, and then resizes
the image such that it's maximum dimension is (output_size * random_scale).
Secondly, a square output_size crop is extracted from the resized image
(note, this will only occur when random_scale > 1.0). Lastly, the cropped
region is padded to the desired square output_size, by filling with zeros.
The augmentation is borrowed from [1]
[1]: https://arxiv.org/abs/1911.09070
Args:
image: rank 3 float32 tensor containing 1 image ->
[height, width, channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
are in normalized form meaning their coordinates vary between [0, 1]. Each
row is in the form of [ymin, xmin, ymax, xmax]. Boxes on the crop boundary
are clipped to the boundary and boxes falling outside the crop are
ignored.
labels: rank 1 int32 tensor containing the object classes.
label_weights: float32 tensor of shape [num_instances] representing the
weight for each box.
masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
width] containing instance masks. The masks are of the same height, width
as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
scale_min: float, the minimum value for the random scale factor.
scale_max: float, the maximum value for the random scale factor.
output_size: int, the desired (square) output image size.
resize_method: tf.image.ResizeMethod, resize method to use when scaling the
input images.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
label_weights: rank 1 float32 tensor with shape [num_instances].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
"""
img_shape = tf.shape(image)
input_height, input_width = img_shape[0], img_shape[1]
random_scale = tf.random_uniform([], scale_min, scale_max, seed=seed)
# Compute the scaled height and width from the random scale.
max_input_dim = tf.cast(tf.maximum(input_height, input_width), tf.float32)
input_ar_y = tf.cast(input_height, tf.float32) / max_input_dim
input_ar_x = tf.cast(input_width, tf.float32) / max_input_dim
scaled_height = tf.cast(random_scale * output_size * input_ar_y, tf.int32)
scaled_width = tf.cast(random_scale * output_size * input_ar_x, tf.int32)
# Compute the offsets:
offset_y = tf.cast(scaled_height - output_size, tf.float32)
offset_x = tf.cast(scaled_width - output_size, tf.float32)
offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1, seed=seed)
offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1, seed=seed)
offset_y = tf.cast(offset_y, tf.int32)
offset_x = tf.cast(offset_x, tf.int32)
# Scale, crop, and pad the input image.
scaled_image = tf.image.resize_images(
image, [scaled_height, scaled_width], method=resize_method)
scaled_image = scaled_image[offset_y:offset_y + output_size,
offset_x:offset_x + output_size, :]
output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0, output_size,
output_size)
# Update the boxes.
new_window = tf.cast(
tf.stack([offset_y, offset_x,
offset_y + output_size, offset_x + output_size]),
dtype=tf.float32)
new_window /= tf.cast(
tf.stack([scaled_height, scaled_width, scaled_height, scaled_width]),
dtype=tf.float32)
boxlist = box_list.BoxList(boxes)
boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
boxlist, indices = box_list_ops.prune_completely_outside_window(
boxlist, [0.0, 0.0, 1.0, 1.0])
boxlist = box_list_ops.clip_to_window(
boxlist, [0.0, 0.0, 1.0, 1.0], filter_nonoverlapping=False)
return_values = [output_image, boxlist.get(),
tf.gather(labels, indices),
tf.gather(label_weights, indices)]
if masks is not None:
new_masks = tf.expand_dims(masks, -1)
new_masks = tf.image.resize_images(
new_masks, [scaled_height, scaled_width], method=resize_method)
new_masks = new_masks[:, offset_y:offset_y + output_size,
offset_x:offset_x + output_size, :]
new_masks = tf.image.pad_to_bounding_box(
new_masks, 0, 0, output_size, output_size)
new_masks = tf.squeeze(new_masks, [-1])
return_values.append(tf.gather(new_masks, indices))
if keypoints is not None:
keypoints = tf.gather(keypoints, indices)
keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
keypoints = keypoint_ops.prune_outside_window(
keypoints, [0.0, 0.0, 1.0, 1.0])
return_values.append(keypoints)
return return_values
def get_default_func_arg_map(include_label_weights=True,
include_label_confidences=False,
include_multiclass_scores=False,
......@@ -4230,15 +4355,14 @@ def get_default_func_arg_map(include_label_weights=True,
random_adjust_saturation: (fields.InputDataFields.image,),
random_distort_color: (fields.InputDataFields.image,),
random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
random_crop_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights,
groundtruth_label_confidences, multiclass_scores,
groundtruth_instance_masks, groundtruth_keypoints,
groundtruth_keypoint_visibilities,
groundtruth_dp_num_points, groundtruth_dp_part_ids,
groundtruth_dp_surface_coords),
random_crop_image:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_label_confidences,
multiclass_scores, groundtruth_instance_masks, groundtruth_keypoints,
groundtruth_keypoint_visibilities, groundtruth_dp_num_points,
groundtruth_dp_part_ids, groundtruth_dp_surface_coords),
random_pad_image:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
......@@ -4361,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_instance_masks,
groundtruth_keypoints),
random_scale_crop_and_pad_to_square:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_instance_masks,
groundtruth_keypoints),
}
return prep_func_arg_map
......
......@@ -712,76 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
test_masks=True,
test_keypoints=True)
@parameterized.parameters(
{'include_dense_pose': False},
{'include_dense_pose': True}
)
def testRunRandomHorizontalFlipWithMaskAndKeypoints(self, include_dense_pose):
def graph_fn():
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
image_height = 3
image_width = 3
images = tf.random_uniform([1, image_height, image_width, 3])
boxes = self.createTestBoxes()
masks = self.createTestMasks()
keypoints, keypoint_visibilities = self.createTestKeypoints()
dp_num_point, dp_part_ids, dp_surface_coords = self.createTestDensePose()
keypoint_flip_permutation = self.createKeypointFlipPermutation()
tensor_dict = {
fields.InputDataFields.image:
images,
fields.InputDataFields.groundtruth_boxes:
boxes,
fields.InputDataFields.groundtruth_instance_masks:
masks,
fields.InputDataFields.groundtruth_keypoints:
keypoints,
fields.InputDataFields.groundtruth_keypoint_visibilities:
keypoint_visibilities
}
if include_dense_pose:
tensor_dict.update({
fields.InputDataFields.groundtruth_dp_num_points: dp_num_point,
fields.InputDataFields.groundtruth_dp_part_ids: dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords:
dp_surface_coords
})
preprocess_options = [(preprocessor.random_horizontal_flip, {
'keypoint_flip_permutation': keypoint_flip_permutation
})]
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True,
include_keypoints=True,
include_keypoint_visibilities=True,
include_dense_pose=include_dense_pose)
tensor_dict = preprocessor.preprocess(
tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
keypoint_visibilities = tensor_dict[
fields.InputDataFields.groundtruth_keypoint_visibilities]
output_tensors = [boxes, masks, keypoints, keypoint_visibilities]
if include_dense_pose:
dp_num_points = tensor_dict[
fields.InputDataFields.groundtruth_dp_num_points]
dp_part_ids = tensor_dict[
fields.InputDataFields.groundtruth_dp_part_ids]
dp_surface_coords = tensor_dict[
fields.InputDataFields.groundtruth_dp_surface_coords]
output_tensors.extend([dp_num_points, dp_part_ids, dp_surface_coords])
return output_tensors
output_tensors = self.execute_cpu(graph_fn, [])
self.assertIsNotNone(output_tensors[0]) # Boxes.
self.assertIsNotNone(output_tensors[1]) # Masks.
self.assertIsNotNone(output_tensors[2]) # Keypoints
self.assertIsNotNone(output_tensors[3]) # Keypoint Visibilities.
if include_dense_pose:
self.assertIsNotNone(output_tensors[4]) # DensePose Num Points.
self.assertIsNotNone(output_tensors[5]) # DensePose Part IDs.
self.assertIsNotNone(output_tensors[6]) # DensePose Surface Coords
def testRandomVerticalFlip(self):
......@@ -2380,7 +2310,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
@parameterized.parameters(
{'include_dense_pose': False},
{'include_dense_pose': True}
)
def testRandomPadImageWithKeypointsAndMasks(self, include_dense_pose):
def graph_fn():
......@@ -3912,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
size = max(image.shape)
self.assertAlmostEqual(scale * 256.0, size)
self.assertAllClose(image[:, :, 0], masks[0, :, :])
@parameterized.named_parameters(('scale_0_1', 0.1), ('scale_1_0', 1.0),
('scale_2_0', 2.0))
def test_random_scale_crop_and_pad_to_square(self, scale):
def graph_fn():
image = np.random.randn(512, 256, 1)
box_centers = [0.25, 0.5, 0.75]
box_size = 0.1
box_corners = []
box_labels = []
box_label_weights = []
keypoints = []
masks = []
for center_y in box_centers:
for center_x in box_centers:
box_corners.append(
[center_y - box_size / 2.0, center_x - box_size / 2.0,
center_y + box_size / 2.0, center_x + box_size / 2.0])
box_labels.append([1])
box_label_weights.append([1.])
keypoints.append(
[[center_y - box_size / 2.0, center_x - box_size / 2.0],
[center_y + box_size / 2.0, center_x + box_size / 2.0]])
masks.append(image[:, :, 0].reshape(512, 256))
image = tf.constant(image)
boxes = tf.constant(box_corners)
labels = tf.constant(box_labels)
label_weights = tf.constant(box_label_weights)
keypoints = tf.constant(keypoints)
masks = tf.constant(np.stack(masks))
(new_image, new_boxes, _, _, new_masks,
new_keypoints) = preprocessor.random_scale_crop_and_pad_to_square(
image,
boxes,
labels,
label_weights,
masks=masks,
keypoints=keypoints,
scale_min=scale,
scale_max=scale,
output_size=512)
return new_image, new_boxes, new_masks, new_keypoints
image, boxes, masks, keypoints = self.execute_cpu(graph_fn, [])
# Since random_scale_crop_and_pad_to_square may prune and clip boxes,
# we only need to find one of the boxes that was not clipped and check
# that it matches the expected dimensions. Note, assertAlmostEqual(a, b)
# is equivalent to round(a-b, 7) == 0.
any_box_has_correct_size = False
effective_scale_y = int(scale * 512) / 512.0
effective_scale_x = int(scale * 256) / 512.0
expected_size_y = 0.1 * effective_scale_y
expected_size_x = 0.1 * effective_scale_x
for box in boxes:
ymin, xmin, ymax, xmax = box
any_box_has_correct_size |= (
(round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
(round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
(round((ymax - ymin) - expected_size_y, 7) == 0.0) and
(round((xmax - xmin) - expected_size_x, 7) == 0.0))
self.assertTrue(any_box_has_correct_size)
# Similar to the approach above where we check for at least one box with the
# expected dimensions, we check for at least one pair of keypoints whose
# distance matches the expected dimensions.
any_keypoint_pair_has_correct_dist = False
for keypoint_pair in keypoints:
ymin, xmin = keypoint_pair[0]
ymax, xmax = keypoint_pair[1]
any_keypoint_pair_has_correct_dist |= (
(round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
(round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
(round((ymax - ymin) - expected_size_y, 7) == 0.0) and
(round((xmax - xmin) - expected_size_x, 7) == 0.0))
self.assertTrue(any_keypoint_pair_has_correct_dist)
self.assertAlmostEqual(512.0, image.shape[0])
self.assertAlmostEqual(512.0, image.shape[1])
self.assertAllClose(image[:, :, 0],
masks[0, :, :])
......
......@@ -141,6 +141,8 @@ class DetectionResultFields(object):
for detection boxes in the image including background class.
detection_classes: detection-level class labels.
detection_masks: contains a segmentation mask for each detection box.
detection_surface_coords: contains DensePose surface coordinates for each
box.
detection_boundaries: contains an object boundary for each detection box.
detection_keypoints: contains detection keypoints for each detection box.
detection_keypoint_scores: contains detection keypoint scores.
......@@ -161,6 +163,7 @@ class DetectionResultFields(object):
detection_features = 'detection_features'
detection_classes = 'detection_classes'
detection_masks = 'detection_masks'
detection_surface_coords = 'detection_surface_coords'
detection_boundaries = 'detection_boundaries'
detection_keypoints = 'detection_keypoints'
detection_keypoint_scores = 'detection_keypoint_scores'
......@@ -182,7 +185,11 @@ class BoxListFields(object):
masks: masks per bounding box.
boundaries: boundaries per bounding box.
keypoints: keypoints per bounding box.
keypoint_visibilities: keypoint visibilities per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
densepose_num_points: number of DensePose points per bounding box.
densepose_part_ids: DensePose part ids per bounding box.
densepose_surface_coords: DensePose surface coordinates per bounding box.
is_crowd: is_crowd annotation per bounding box.
"""
boxes = 'boxes'
......@@ -196,6 +203,9 @@ class BoxListFields(object):
keypoints = 'keypoints'
keypoint_visibilities = 'keypoint_visibilities'
keypoint_heatmaps = 'keypoint_heatmaps'
densepose_num_points = 'densepose_num_points'
densepose_part_ids = 'densepose_part_ids'
densepose_surface_coords = 'densepose_surface_coords'
is_crowd = 'is_crowd'
group_of = 'group_of'
......
......@@ -50,13 +50,16 @@ import io
import itertools
import json
import os
import apache_beam as beam
import numpy as np
import PIL.Image
import six
import tensorflow.compat.v1 as tf
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class ReKeyDataFn(beam.DoFn):
"""Re-keys tfrecords by sequence_key.
......
......@@ -22,7 +22,7 @@ import datetime
import os
import tempfile
import unittest
import apache_beam as beam
import numpy as np
import six
import tensorflow.compat.v1 as tf
......@@ -31,6 +31,12 @@ from object_detection.dataset_tools.context_rcnn import add_context_to_examples
from object_detection.utils import tf_version
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
@contextlib.contextmanager
def InMemoryTFRecord(entries):
temp = tempfile.NamedTemporaryFile(delete=False)
......
......@@ -39,12 +39,16 @@ import io
import json
import logging
import os
import apache_beam as beam
import numpy as np
import PIL.Image
import tensorflow.compat.v1 as tf
from object_detection.utils import dataset_util
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class ParseImage(beam.DoFn):
"""A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
......
......@@ -22,7 +22,6 @@ import os
import tempfile
import unittest
import apache_beam as beam
import numpy as np
from PIL import Image
......@@ -30,6 +29,11 @@ import tensorflow.compat.v1 as tf
from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
from object_detection.utils import tf_version
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
......
......@@ -48,8 +48,11 @@ from __future__ import print_function
import argparse
import os
import threading
import apache_beam as beam
import tensorflow.compat.v1 as tf
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class GenerateDetectionDataFn(beam.DoFn):
......
......@@ -22,7 +22,6 @@ import contextlib
import os
import tempfile
import unittest
import apache_beam as beam
import numpy as np
import six
import tensorflow.compat.v1 as tf
......@@ -39,6 +38,11 @@ if six.PY2:
else:
mock = unittest.mock
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class FakeModel(model.DetectionModel):
"""A Fake Detection model with expected output nodes from post-processing."""
......
......@@ -34,7 +34,8 @@ python tensorflow_models/object_detection/export_inference_graph.py \
--input_type tf_example \
--pipeline_config_path path/to/faster_rcnn_model.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
--output_directory path/to/exported_model_directory \
--additional_output_tensor_names detection_features
python generate_embedding_data.py \
--alsologtostderr \
......@@ -52,11 +53,15 @@ import datetime
import os
import threading
import apache_beam as beam
import numpy as np
import six
import tensorflow.compat.v1 as tf
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class GenerateEmbeddingDataFn(beam.DoFn):
"""Generates embedding data for camera trap images.
......
......@@ -21,7 +21,6 @@ import contextlib
import os
import tempfile
import unittest
import apache_beam as beam
import numpy as np
import six
import tensorflow.compat.v1 as tf
......@@ -38,6 +37,11 @@ if six.PY2:
else:
mock = unittest.mock
try:
import apache_beam as beam # pylint:disable=g-import-not-at-top
except ModuleNotFoundError:
pass
class FakeModel(model.DetectionModel):
"""A Fake Detection model with expected output nodes from post-processing."""
......
......@@ -51,25 +51,25 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
feature_map = {
standard_fields.TfExampleFields.object_bbox_ymin:
dataset_util.float_list_feature(
filtered_data_frame_boxes.YMin.as_matrix()),
filtered_data_frame_boxes.YMin.to_numpy()),
standard_fields.TfExampleFields.object_bbox_xmin:
dataset_util.float_list_feature(
filtered_data_frame_boxes.XMin.as_matrix()),
filtered_data_frame_boxes.XMin.to_numpy()),
standard_fields.TfExampleFields.object_bbox_ymax:
dataset_util.float_list_feature(
filtered_data_frame_boxes.YMax.as_matrix()),
filtered_data_frame_boxes.YMax.to_numpy()),
standard_fields.TfExampleFields.object_bbox_xmax:
dataset_util.float_list_feature(
filtered_data_frame_boxes.XMax.as_matrix()),
filtered_data_frame_boxes.XMax.to_numpy()),
standard_fields.TfExampleFields.object_class_text:
dataset_util.bytes_list_feature([
six.ensure_binary(label_text)
for label_text in filtered_data_frame_boxes.LabelName.as_matrix()
for label_text in filtered_data_frame_boxes.LabelName.to_numpy()
]),
standard_fields.TfExampleFields.object_class_label:
dataset_util.int64_list_feature(
filtered_data_frame_boxes.LabelName.map(
lambda x: label_map[x]).as_matrix()),
lambda x: label_map[x]).to_numpy()),
standard_fields.TfExampleFields.filename:
dataset_util.bytes_feature(
six.ensure_binary('{}.jpg'.format(image_id))),
......@@ -82,31 +82,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
if 'IsGroupOf' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_group_of] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
filtered_data_frame_boxes.IsGroupOf.to_numpy().astype(int))
if 'IsOccluded' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_occluded] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
filtered_data_frame_boxes.IsOccluded.to_numpy().astype(
int))
if 'IsTruncated' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_truncated] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
filtered_data_frame_boxes.IsTruncated.to_numpy().astype(
int))
if 'IsDepiction' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_depiction] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
filtered_data_frame_boxes.IsDepiction.to_numpy().astype(
int))
if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
feature_map[standard_fields.TfExampleFields.
image_class_label] = dataset_util.int64_list_feature(
filtered_data_frame_labels.LabelName.map(
lambda x: label_map[x]).as_matrix())
lambda x: label_map[x]).to_numpy())
feature_map[standard_fields.TfExampleFields
.image_class_text] = dataset_util.bytes_list_feature([
six.ensure_binary(label_text) for label_text in
filtered_data_frame_labels.LabelName.as_matrix()
filtered_data_frame_labels.LabelName.to_numpy()
]),
return tf.train.Example(features=tf.train.Features(feature=feature_map))
......@@ -552,7 +552,11 @@ def _resize_detection_masks(args):
detection_boxes, detection_masks, image_shape = args
detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image_shape[0], image_shape[1])
return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# If the masks are currently float, binarize them. Otherwise keep them as
# integers, since they have already been thresholded.
if detection_masks_reframed.dtype == tf.float32:
detection_masks_reframed = tf.greater(detection_masks_reframed, 0.5)
return tf.cast(detection_masks_reframed, tf.uint8)
def _resize_groundtruth_masks(args):
......@@ -570,6 +574,17 @@ def _resize_groundtruth_masks(args):
return tf.cast(tf.squeeze(mask, 3), tf.uint8)
def _resize_surface_coordinate_masks(args):
detection_boxes, surface_coords, image_shape = args
surface_coords_v, surface_coords_u = tf.unstack(surface_coords, axis=-1)
surface_coords_v_reframed = ops.reframe_box_masks_to_image_masks(
surface_coords_v, detection_boxes, image_shape[0], image_shape[1])
surface_coords_u_reframed = ops.reframe_box_masks_to_image_masks(
surface_coords_u, detection_boxes, image_shape[0], image_shape[1])
return tf.stack([surface_coords_v_reframed, surface_coords_u_reframed],
axis=-1)
def _scale_keypoint_to_absolute(args):
keypoints, image_shape = args
return keypoint_ops.scale(keypoints, image_shape[0], image_shape[1])
......@@ -720,6 +735,12 @@ def result_dict_for_batched_example(images,
num_keypoints] bool tensor with keypoint visibilities (Optional).
'groundtruth_labeled_classes': [batch_size, num_classes] int64
tensor of 1-indexed classes. (Optional)
'groundtruth_dp_num_points': [batch_size, max_number_of_boxes] int32
tensor. (Optional)
'groundtruth_dp_part_ids': [batch_size, max_number_of_boxes,
max_sampled_points] int32 tensor. (Optional)
'groundtruth_dp_surface_coords_list': [batch_size, max_number_of_boxes,
max_sampled_points, 4] float32 tensor. (Optional)
class_agnostic: Boolean indicating whether the detections are class-agnostic
(i.e. binary). Default False.
scale_to_absolute: Boolean indicating whether boxes and keypoints should be
......@@ -747,12 +768,16 @@ def result_dict_for_batched_example(images,
'detection_scores': [batch_size, max_detections] float32 tensor of scores.
'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
classes.
'detection_masks': [batch_size, max_detections, H, W] float32 tensor of
binarized masks, reframed to full image masks. (Optional)
'detection_masks': [batch_size, max_detections, H, W] uint8 tensor of
instance masks, reframed to full image masks. Note that these may be
binarized (e.g. {0, 1}), or may contain 1-indexed part labels. (Optional)
'detection_keypoints': [batch_size, max_detections, num_keypoints, 2]
float32 tensor containing keypoint coordinates. (Optional)
'detection_keypoint_scores': [batch_size, max_detections, num_keypoints]
float32 tensor containing keypoint scores. (Optional)
'detection_surface_coords': [batch_size, max_detection, H, W, 2] float32
tensor with normalized surface coordinates (e.g. DensePose UV
coordinates). (Optional)
'num_detections': [batch_size] int64 tensor containing number of valid
detections.
'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
......@@ -844,14 +869,21 @@ def result_dict_for_batched_example(images,
if detection_fields.detection_masks in detections:
detection_masks = detections[detection_fields.detection_masks]
# TODO(rathodv): This should be done in model's postprocess
# function ideally.
output_dict[detection_fields.detection_masks] = (
shape_utils.static_or_dynamic_map_fn(
_resize_detection_masks,
elems=[detection_boxes, detection_masks,
original_image_spatial_shapes],
dtype=tf.uint8))
if detection_fields.detection_surface_coords in detections:
detection_surface_coords = detections[
detection_fields.detection_surface_coords]
output_dict[detection_fields.detection_surface_coords] = (
shape_utils.static_or_dynamic_map_fn(
_resize_surface_coordinate_masks,
elems=[detection_boxes, detection_surface_coords,
original_image_spatial_shapes],
dtype=tf.float32))
if detection_fields.detection_keypoints in detections:
detection_keypoints = detections[detection_fields.detection_keypoints]
......@@ -1074,3 +1106,8 @@ def evaluator_options_from_eval_config(eval_config):
'recall_upper_bound': (eval_config.recall_upper_bound)
}
return evaluator_options
def has_densepose(eval_dict):
return (fields.DetectionResultFields.detection_masks in eval_dict and
fields.DetectionResultFields.detection_surface_coords in eval_dict)
......@@ -924,13 +924,16 @@ def convert_strided_predictions_to_normalized_keypoints(
def convert_strided_predictions_to_instance_masks(
boxes, classes, masks, stride, mask_height, mask_width,
true_image_shapes, score_threshold=0.5):
boxes, classes, masks, true_image_shapes,
densepose_part_heatmap=None, densepose_surface_coords=None, stride=4,
mask_height=256, mask_width=256, score_threshold=0.5,
densepose_class_index=-1):
"""Converts predicted full-image masks into instance masks.
For each predicted detection box:
* Crop and resize the predicted mask based on the detected bounding box
coordinates and class prediction. Uses bilinear resampling.
* Crop and resize the predicted mask (and optionally DensePose coordinates)
based on the detected bounding box coordinates and class prediction. Uses
bilinear resampling.
* Binarize the mask using the provided score threshold.
Args:
......@@ -940,57 +943,212 @@ def convert_strided_predictions_to_instance_masks(
detected class for each box (0-indexed).
masks: A [batch, output_height, output_width, num_classes] float32
tensor with class probabilities.
true_image_shapes: A tensor of shape [batch, 3] representing the true
shape of the inputs not considering padding.
densepose_part_heatmap: (Optional) A [batch, output_height, output_width,
num_parts] float32 tensor with part scores (i.e. logits).
densepose_surface_coords: (Optional) A [batch, output_height, output_width,
2 * num_parts] float32 tensor with predicted part coordinates (in
vu-format).
stride: The stride in the output space.
mask_height: The desired resized height for instance masks.
mask_width: The desired resized width for instance masks.
true_image_shapes: A tensor of shape [batch, 3] representing the true
shape of the inputs not considering padding.
score_threshold: The threshold at which to convert predicted mask
into foreground pixels.
densepose_class_index: The class index (0-indexed) corresponding to the
class which has DensePose labels (e.g. person class).
Returns:
A [batch_size, max_detections, mask_height, mask_width] uint8 tensor with
predicted foreground mask for each instance. The masks take values in
{0, 1}.
A tuple of masks and surface_coords.
instance_masks: A [batch_size, max_detections, mask_height, mask_width]
uint8 tensor with predicted foreground mask for each
instance. If DensePose tensors are provided, then each pixel value in the
mask encodes the 1-indexed part.
surface_coords: A [batch_size, max_detections, mask_height, mask_width, 2]
float32 tensor with (v, u) coordinates. Note that v, u coordinates are
only defined on instance masks, and the coordinates at each location of
the foreground mask correspond to coordinates on a local part coordinate
system (the specific part can be inferred from the `instance_masks`
output. If DensePose feature maps are not passed to this function, this
output will be None.
Raises:
ValueError: If one but not both of `densepose_part_heatmap` and
`densepose_surface_coords` is provided.
"""
_, output_height, output_width, _ = (
batch_size, output_height, output_width, _ = (
shape_utils.combined_static_and_dynamic_shape(masks))
input_height = stride * output_height
input_width = stride * output_width
true_heights, true_widths, _ = tf.unstack(true_image_shapes, axis=1)
# If necessary, create dummy DensePose tensors to simplify the map function.
densepose_present = True
if ((densepose_part_heatmap is not None) ^
(densepose_surface_coords is not None)):
raise ValueError('To use DensePose, both `densepose_part_heatmap` and '
'`densepose_surface_coords` must be provided')
if densepose_part_heatmap is None and densepose_surface_coords is None:
densepose_present = False
densepose_part_heatmap = tf.zeros(
(batch_size, output_height, output_width, 1), dtype=tf.float32)
densepose_surface_coords = tf.zeros(
(batch_size, output_height, output_width, 2), dtype=tf.float32)
crop_and_threshold_fn = functools.partial(
crop_and_threshold_masks, input_height=input_height,
input_width=input_width, mask_height=mask_height, mask_width=mask_width,
score_threshold=score_threshold,
densepose_class_index=densepose_class_index)
instance_masks, surface_coords = shape_utils.static_or_dynamic_map_fn(
crop_and_threshold_fn,
elems=[boxes, classes, masks, densepose_part_heatmap,
densepose_surface_coords, true_heights, true_widths],
dtype=[tf.uint8, tf.float32],
back_prop=False)
surface_coords = surface_coords if densepose_present else None
return instance_masks, surface_coords
def crop_and_threshold_masks(elems, input_height, input_width, mask_height=256,
mask_width=256, score_threshold=0.5,
densepose_class_index=-1):
"""Crops and thresholds masks based on detection boxes.
Args:
elems: A tuple of
boxes - float32 tensor of shape [max_detections, 4]
classes - int32 tensor of shape [max_detections] (0-indexed)
masks - float32 tensor of shape [output_height, output_width, num_classes]
part_heatmap - float32 tensor of shape [output_height, output_width,
num_parts]
surf_coords - float32 tensor of shape [output_height, output_width,
2 * num_parts]
true_height - scalar int tensor
true_width - scalar int tensor
input_height: Input height to network.
input_width: Input width to network.
mask_height: Height for resizing mask crops.
mask_width: Width for resizing mask crops.
score_threshold: The threshold at which to convert predicted mask
into foreground pixels.
densepose_class_index: scalar int tensor with the class index (0-indexed)
for DensePose.
Returns:
A tuple of
all_instances: A [max_detections, mask_height, mask_width] uint8 tensor
with a predicted foreground mask for each instance. Background is encoded
as 0, and foreground is encoded as a positive integer. Specific part
indices are encoded as 1-indexed parts (for classes that have part
information).
surface_coords: A [max_detections, mask_height, mask_width, 2]
float32 tensor with (v, u) coordinates. for each part.
"""
(boxes, classes, masks, part_heatmap, surf_coords, true_height,
true_width) = elems
# Boxes are in normalized coordinates relative to true image shapes. Convert
# coordinates to be normalized relative to input image shapes (since masks
# may still have padding).
# Then crop and resize each mask.
def crop_and_threshold_masks(args):
"""Crops masks based on detection boxes."""
boxes, classes, masks, true_height, true_width = args
boxlist = box_list.BoxList(boxes)
y_scale = true_height / input_height
x_scale = true_width / input_width
boxlist = box_list_ops.scale(boxlist, y_scale, x_scale)
boxes = boxlist.get()
# Convert masks from [input_height, input_width, num_classes] to
# [num_classes, input_height, input_width, 1].
masks_4d = tf.transpose(masks, perm=[2, 0, 1])[:, :, :, tf.newaxis]
cropped_masks = tf2.image.crop_and_resize(
masks_4d,
boxes=boxes,
box_indices=classes,
crop_size=[mask_height, mask_width],
method='bilinear')
masks_3d = tf.squeeze(cropped_masks, axis=3)
masks_binarized = tf.math.greater_equal(masks_3d, score_threshold)
return tf.cast(masks_binarized, tf.uint8)
boxlist = box_list.BoxList(boxes)
y_scale = true_height / input_height
x_scale = true_width / input_width
boxlist = box_list_ops.scale(boxlist, y_scale, x_scale)
boxes = boxlist.get()
# Convert masks from [output_height, output_width, num_classes] to
# [num_classes, output_height, output_width, 1].
num_classes = tf.shape(masks)[-1]
masks_4d = tf.transpose(masks, perm=[2, 0, 1])[:, :, :, tf.newaxis]
# Tile part and surface coordinate masks for all classes.
part_heatmap_4d = tf.tile(part_heatmap[tf.newaxis, :, :, :],
multiples=[num_classes, 1, 1, 1])
surf_coords_4d = tf.tile(surf_coords[tf.newaxis, :, :, :],
multiples=[num_classes, 1, 1, 1])
feature_maps_concat = tf.concat([masks_4d, part_heatmap_4d, surf_coords_4d],
axis=-1)
# The following tensor has shape
# [max_detections, mask_height, mask_width, 1 + 3 * num_parts].
cropped_masks = tf2.image.crop_and_resize(
feature_maps_concat,
boxes=boxes,
box_indices=classes,
crop_size=[mask_height, mask_width],
method='bilinear')
# Split the cropped masks back into instance masks, part masks, and surface
# coordinates.
num_parts = tf.shape(part_heatmap)[-1]
instance_masks, part_heatmap_cropped, surface_coords_cropped = tf.split(
cropped_masks, [1, num_parts, 2 * num_parts], axis=-1)
# Threshold the instance masks. Resulting tensor has shape
# [max_detections, mask_height, mask_width, 1].
instance_masks_int = tf.cast(
tf.math.greater_equal(instance_masks, score_threshold), dtype=tf.int32)
# Produce a binary mask that is 1.0 only:
# - in the foreground region for an instance
# - in detections corresponding to the DensePose class
det_with_parts = tf.equal(classes, densepose_class_index)
det_with_parts = tf.cast(
tf.reshape(det_with_parts, [-1, 1, 1, 1]), dtype=tf.int32)
instance_masks_with_parts = tf.math.multiply(instance_masks_int,
det_with_parts)
# Similarly, produce a binary mask that holds the foreground masks only for
# instances without parts (i.e. non-DensePose classes).
det_without_parts = 1 - det_with_parts
instance_masks_without_parts = tf.math.multiply(instance_masks_int,
det_without_parts)
# Assemble a tensor that has standard instance segmentation masks for
# non-DensePose classes (with values in [0, 1]), and part segmentation masks
# for DensePose classes (with vaues in [0, 1, ..., num_parts]).
part_mask_int_zero_indexed = tf.math.argmax(
part_heatmap_cropped, axis=-1, output_type=tf.int32)[:, :, :, tf.newaxis]
part_mask_int_one_indexed = part_mask_int_zero_indexed + 1
all_instances = (instance_masks_without_parts +
instance_masks_with_parts * part_mask_int_one_indexed)
# Gather the surface coordinates for the parts.
surface_coords_cropped = tf.reshape(
surface_coords_cropped, [-1, mask_height, mask_width, num_parts, 2])
surface_coords = gather_surface_coords_for_parts(surface_coords_cropped,
part_mask_int_zero_indexed)
surface_coords = (
surface_coords * tf.cast(instance_masks_with_parts, tf.float32))
return [tf.squeeze(all_instances, axis=3), surface_coords]
def gather_surface_coords_for_parts(surface_coords_cropped,
highest_scoring_part):
"""Gathers the (v, u) coordinates for the highest scoring DensePose parts.
true_heights, true_widths, _ = tf.unstack(true_image_shapes, axis=1)
masks_for_image = shape_utils.static_or_dynamic_map_fn(
crop_and_threshold_masks,
elems=[boxes, classes, masks, true_heights, true_widths],
dtype=tf.uint8,
back_prop=False)
masks = tf.stack(masks_for_image, axis=0)
return masks
Args:
surface_coords_cropped: A [max_detections, height, width, num_parts, 2]
float32 tensor with (v, u) surface coordinates.
highest_scoring_part: A [max_detections, height, width] integer tensor with
the highest scoring part (0-indexed) indices for each location.
Returns:
A [max_detections, height, width, 2] float32 tensor with the (v, u)
coordinates selected from the highest scoring parts.
"""
max_detections, height, width, num_parts, _ = (
shape_utils.combined_static_and_dynamic_shape(surface_coords_cropped))
flattened_surface_coords = tf.reshape(surface_coords_cropped, [-1, 2])
flattened_part_ids = tf.reshape(highest_scoring_part, [-1])
# Produce lookup indices that represent the locations of the highest scoring
# parts in the `flattened_surface_coords` tensor.
flattened_lookup_indices = (
num_parts * tf.range(max_detections * height * width) +
flattened_part_ids)
vu_coords_flattened = tf.gather(flattened_surface_coords,
flattened_lookup_indices, axis=0)
return tf.reshape(vu_coords_flattened, [max_detections, height, width, 2])
class ObjectDetectionParams(
......@@ -1235,6 +1393,64 @@ class MaskParams(
score_threshold, heatmap_bias_init)
class DensePoseParams(
collections.namedtuple('DensePoseParams', [
'class_id', 'classification_loss', 'localization_loss',
'part_loss_weight', 'coordinate_loss_weight', 'num_parts',
'task_loss_weight', 'upsample_to_input_res', 'upsample_method',
'heatmap_bias_init'
])):
"""Namedtuple to store DensePose prediction related parameters."""
__slots__ = ()
def __new__(cls,
class_id,
classification_loss,
localization_loss,
part_loss_weight=1.0,
coordinate_loss_weight=1.0,
num_parts=24,
task_loss_weight=1.0,
upsample_to_input_res=True,
upsample_method='bilinear',
heatmap_bias_init=-2.19):
"""Constructor with default values for DensePoseParams.
Args:
class_id: the ID of the class that contains the DensePose groundtruth.
This should typically correspond to the "person" class. Note that the ID
is 0-based, meaning that class 0 corresponds to the first non-background
object class.
classification_loss: an object_detection.core.losses.Loss object to
compute the loss for the body part predictions in CenterNet.
localization_loss: an object_detection.core.losses.Loss object to compute
the loss for the surface coordinate regression in CenterNet.
part_loss_weight: The loss weight to apply to part prediction.
coordinate_loss_weight: The loss weight to apply to surface coordinate
prediction.
num_parts: The number of DensePose parts to predict.
task_loss_weight: float, the loss weight for the DensePose task.
upsample_to_input_res: Whether to upsample the DensePose feature maps to
the input resolution before applying loss. Note that the prediction
outputs are still at the standard CenterNet output stride.
upsample_method: Method for upsampling DensePose feature maps. Options are
either 'bilinear' or 'nearest'). This takes no effect when
`upsample_to_input_res` is False.
heatmap_bias_init: float, the initial value of bias in the convolutional
kernel of the part prediction head. If set to None, the
bias is initialized with zeros.
Returns:
An initialized DensePoseParams namedtuple.
"""
return super(DensePoseParams,
cls).__new__(cls, class_id, classification_loss,
localization_loss, part_loss_weight,
coordinate_loss_weight, num_parts,
task_loss_weight, upsample_to_input_res,
upsample_method, heatmap_bias_init)
# The following constants are used to generate the keys of the
# (prediction, loss, target assigner,...) dictionaries used in CenterNetMetaArch
# class.
......@@ -1247,6 +1463,9 @@ KEYPOINT_HEATMAP = 'keypoint/heatmap'
KEYPOINT_OFFSET = 'keypoint/offset'
SEGMENTATION_TASK = 'segmentation_task'
SEGMENTATION_HEATMAP = 'segmentation/heatmap'
DENSEPOSE_TASK = 'densepose_task'
DENSEPOSE_HEATMAP = 'densepose/heatmap'
DENSEPOSE_REGRESSION = 'densepose/regression'
LOSS_KEY_PREFIX = 'Loss'
......@@ -1290,7 +1509,8 @@ class CenterNetMetaArch(model.DetectionModel):
object_center_params,
object_detection_params=None,
keypoint_params_dict=None,
mask_params=None):
mask_params=None,
densepose_params=None):
"""Initializes a CenterNet model.
Args:
......@@ -1318,6 +1538,10 @@ class CenterNetMetaArch(model.DetectionModel):
mask_params: A MaskParams namedtuple. This object
holds the hyper-parameters for segmentation. Please see the class
definition for more details.
densepose_params: A DensePoseParams namedtuple. This object holds the
hyper-parameters for DensePose prediction. Please see the class
definition for more details. Note that if this is provided, it is
expected that `mask_params` is also provided.
"""
assert object_detection_params or keypoint_params_dict
# Shorten the name for convenience and better formatting.
......@@ -1333,6 +1557,10 @@ class CenterNetMetaArch(model.DetectionModel):
self._od_params = object_detection_params
self._kp_params_dict = keypoint_params_dict
self._mask_params = mask_params
if densepose_params is not None and mask_params is None:
raise ValueError('To run DensePose prediction, `mask_params` must also '
'be supplied.')
self._densepose_params = densepose_params
# Construct the prediction head nets.
self._prediction_head_dict = self._construct_prediction_heads(
......@@ -1413,8 +1641,18 @@ class CenterNetMetaArch(model.DetectionModel):
if self._mask_params is not None:
prediction_heads[SEGMENTATION_HEATMAP] = [
make_prediction_net(num_classes,
bias_fill=class_prediction_bias_init)
bias_fill=self._mask_params.heatmap_bias_init)
for _ in range(num_feature_outputs)]
if self._densepose_params is not None:
prediction_heads[DENSEPOSE_HEATMAP] = [
make_prediction_net( # pylint: disable=g-complex-comprehension
self._densepose_params.num_parts,
bias_fill=self._densepose_params.heatmap_bias_init)
for _ in range(num_feature_outputs)]
prediction_heads[DENSEPOSE_REGRESSION] = [
make_prediction_net(2 * self._densepose_params.num_parts)
for _ in range(num_feature_outputs)
]
return prediction_heads
def _initialize_target_assigners(self, stride, min_box_overlap_iou):
......@@ -1449,6 +1687,10 @@ class CenterNetMetaArch(model.DetectionModel):
if self._mask_params is not None:
target_assigners[SEGMENTATION_TASK] = (
cn_assigner.CenterNetMaskTargetAssigner(stride))
if self._densepose_params is not None:
dp_stride = 1 if self._densepose_params.upsample_to_input_res else stride
target_assigners[DENSEPOSE_TASK] = (
cn_assigner.CenterNetDensePoseTargetAssigner(dp_stride))
return target_assigners
......@@ -1860,6 +2102,113 @@ class CenterNetMetaArch(model.DetectionModel):
float(len(segmentation_predictions)) * total_pixels_in_loss)
return total_loss
def _compute_densepose_losses(self, input_height, input_width,
prediction_dict):
"""Computes the weighted DensePose losses.
Args:
input_height: An integer scalar tensor representing input image height.
input_width: An integer scalar tensor representing input image width.
prediction_dict: A dictionary holding predicted tensors output by the
"predict" function. See the "predict" function for more detailed
description.
Returns:
A dictionary of scalar float tensors representing the weighted losses for
the DensePose task:
DENSEPOSE_HEATMAP: the weighted part segmentation loss.
DENSEPOSE_REGRESSION: the weighted part surface coordinate loss.
"""
dp_heatmap_loss, dp_regression_loss = (
self._compute_densepose_part_and_coordinate_losses(
input_height=input_height,
input_width=input_width,
part_predictions=prediction_dict[DENSEPOSE_HEATMAP],
surface_coord_predictions=prediction_dict[DENSEPOSE_REGRESSION]))
loss_dict = {}
loss_dict[DENSEPOSE_HEATMAP] = (
self._densepose_params.part_loss_weight * dp_heatmap_loss)
loss_dict[DENSEPOSE_REGRESSION] = (
self._densepose_params.coordinate_loss_weight * dp_regression_loss)
return loss_dict
def _compute_densepose_part_and_coordinate_losses(
self, input_height, input_width, part_predictions,
surface_coord_predictions):
"""Computes the individual losses for the DensePose task.
Args:
input_height: An integer scalar tensor representing input image height.
input_width: An integer scalar tensor representing input image width.
part_predictions: A list of float tensors of shape [batch_size,
out_height, out_width, num_parts].
surface_coord_predictions: A list of float tensors of shape [batch_size,
out_height, out_width, 2 * num_parts].
Returns:
A tuple with two scalar loss tensors: part_prediction_loss and
surface_coord_loss.
"""
gt_dp_num_points_list = self.groundtruth_lists(
fields.BoxListFields.densepose_num_points)
gt_dp_part_ids_list = self.groundtruth_lists(
fields.BoxListFields.densepose_part_ids)
gt_dp_surface_coords_list = self.groundtruth_lists(
fields.BoxListFields.densepose_surface_coords)
gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights)
assigner = self._target_assigner_dict[DENSEPOSE_TASK]
batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
assigner.assign_part_and_coordinate_targets(
height=input_height,
width=input_width,
gt_dp_num_points_list=gt_dp_num_points_list,
gt_dp_part_ids_list=gt_dp_part_ids_list,
gt_dp_surface_coords_list=gt_dp_surface_coords_list,
gt_weights_list=gt_weights_list))
part_prediction_loss = 0
surface_coord_loss = 0
classification_loss_fn = self._densepose_params.classification_loss
localization_loss_fn = self._densepose_params.localization_loss
num_predictions = float(len(part_predictions))
num_valid_points = tf.math.count_nonzero(batch_weights)
num_valid_points = tf.cast(tf.math.maximum(num_valid_points, 1), tf.float32)
for part_pred, surface_coord_pred in zip(part_predictions,
surface_coord_predictions):
# Potentially upsample the feature maps, so that better quality (i.e.
# higher res) groundtruth can be applied.
if self._densepose_params.upsample_to_input_res:
part_pred = tf.keras.layers.UpSampling2D(
self._stride, interpolation=self._densepose_params.upsample_method)(
part_pred)
surface_coord_pred = tf.keras.layers.UpSampling2D(
self._stride, interpolation=self._densepose_params.upsample_method)(
surface_coord_pred)
# Compute the part prediction loss.
part_pred = cn_assigner.get_batch_predictions_from_indices(
part_pred, batch_indices[:, 0:3])
part_prediction_loss += classification_loss_fn(
part_pred[:, tf.newaxis, :],
batch_part_ids[:, tf.newaxis, :],
weights=batch_weights[:, tf.newaxis, tf.newaxis])
# Compute the surface coordinate loss.
batch_size, out_height, out_width, _ = _get_shape(
surface_coord_pred, 4)
surface_coord_pred = tf.reshape(
surface_coord_pred, [batch_size, out_height, out_width, -1, 2])
surface_coord_pred = cn_assigner.get_batch_predictions_from_indices(
surface_coord_pred, batch_indices)
surface_coord_loss += localization_loss_fn(
surface_coord_pred,
batch_surface_coords,
weights=batch_weights[:, tf.newaxis])
part_prediction_loss = tf.reduce_sum(part_prediction_loss) / (
num_predictions * num_valid_points)
surface_coord_loss = tf.reduce_sum(surface_coord_loss) / (
num_predictions * num_valid_points)
return part_prediction_loss, surface_coord_loss
def preprocess(self, inputs):
outputs = shape_utils.resize_images_and_return_shapes(
inputs, self._image_resizer_fn)
......@@ -1909,6 +2258,13 @@ class CenterNetMetaArch(model.DetectionModel):
'segmentation/heatmap' - [optional] A list of size num_feature_outputs
holding float tensors of size [batch_size, output_height,
output_width, num_classes] representing the mask logits.
'densepose/heatmap' - [optional] A list of size num_feature_outputs
holding float tensors of size [batch_size, output_height,
output_width, num_parts] representing the mask logits for each part.
'densepose/regression' - [optional] A list of size num_feature_outputs
holding float tensors of size [batch_size, output_height,
output_width, 2 * num_parts] representing the DensePose surface
coordinate predictions.
Note the $TASK_NAME is provided by the KeypointEstimation namedtuple
used to differentiate between different keypoint tasks.
"""
......@@ -1938,10 +2294,16 @@ class CenterNetMetaArch(model.DetectionModel):
scope: Optional scope name.
Returns:
A dictionary mapping the keys ['Loss/object_center', 'Loss/box/scale',
'Loss/box/offset', 'Loss/$TASK_NAME/keypoint/heatmap',
'Loss/$TASK_NAME/keypoint/offset',
'Loss/$TASK_NAME/keypoint/regression', 'Loss/segmentation/heatmap'] to
A dictionary mapping the keys [
'Loss/object_center',
'Loss/box/scale', (optional)
'Loss/box/offset', (optional)
'Loss/$TASK_NAME/keypoint/heatmap', (optional)
'Loss/$TASK_NAME/keypoint/offset', (optional)
'Loss/$TASK_NAME/keypoint/regression', (optional)
'Loss/segmentation/heatmap', (optional)
'Loss/densepose/heatmap', (optional)
'Loss/densepose/regression]' (optional)
scalar tensors corresponding to the losses for different tasks. Note the
$TASK_NAME is provided by the KeypointEstimation namedtuple used to
differentiate between different keypoint tasks.
......@@ -1999,6 +2361,16 @@ class CenterNetMetaArch(model.DetectionModel):
seg_losses[key] = seg_losses[key] * self._mask_params.task_loss_weight
losses.update(seg_losses)
if self._densepose_params is not None:
densepose_losses = self._compute_densepose_losses(
input_height=input_height,
input_width=input_width,
prediction_dict=prediction_dict)
for key in densepose_losses:
densepose_losses[key] = (
densepose_losses[key] * self._densepose_params.task_loss_weight)
losses.update(densepose_losses)
# Prepend the LOSS_KEY_PREFIX to the keys in the dictionary such that the
# losses will be grouped together in Tensorboard.
return dict([('%s/%s' % (LOSS_KEY_PREFIX, key), val)
......@@ -2033,9 +2405,14 @@ class CenterNetMetaArch(model.DetectionModel):
invalid keypoints have their coordinates and scores set to 0.0.
detection_keypoint_scores: (Optional) A float tensor of shape [batch,
max_detection, num_keypoints] with scores for each keypoint.
detection_masks: (Optional) An int tensor of shape [batch,
max_detections, mask_height, mask_width] with binarized masks for each
detection.
detection_masks: (Optional) A uint8 tensor of shape [batch,
max_detections, mask_height, mask_width] with masks for each
detection. Background is specified with 0, and foreground is specified
with positive integers (1 for standard instance segmentation mask, and
1-indexed parts for DensePose task).
detection_surface_coords: (Optional) A float32 tensor of shape [batch,
max_detection, mask_height, mask_width, 2] with DensePose surface
coordinates, in (v, u) format.
"""
object_center_prob = tf.nn.sigmoid(prediction_dict[OBJECT_CENTER][-1])
# Get x, y and channel indices corresponding to the top indices in the class
......@@ -2076,14 +2453,27 @@ class CenterNetMetaArch(model.DetectionModel):
if self._mask_params:
masks = tf.nn.sigmoid(prediction_dict[SEGMENTATION_HEATMAP][-1])
instance_masks = convert_strided_predictions_to_instance_masks(
boxes, classes, masks, self._stride, self._mask_params.mask_height,
self._mask_params.mask_width, true_image_shapes,
self._mask_params.score_threshold)
postprocess_dict.update({
fields.DetectionResultFields.detection_masks:
instance_masks
})
densepose_part_heatmap, densepose_surface_coords = None, None
densepose_class_index = 0
if self._densepose_params:
densepose_part_heatmap = prediction_dict[DENSEPOSE_HEATMAP][-1]
densepose_surface_coords = prediction_dict[DENSEPOSE_REGRESSION][-1]
densepose_class_index = self._densepose_params.class_id
instance_masks, surface_coords = (
convert_strided_predictions_to_instance_masks(
boxes, classes, masks, true_image_shapes,
densepose_part_heatmap, densepose_surface_coords,
stride=self._stride, mask_height=self._mask_params.mask_height,
mask_width=self._mask_params.mask_width,
score_threshold=self._mask_params.score_threshold,
densepose_class_index=densepose_class_index))
postprocess_dict[
fields.DetectionResultFields.detection_masks] = instance_masks
if self._densepose_params:
postprocess_dict[
fields.DetectionResultFields.detection_surface_coords] = (
surface_coords)
return postprocess_dict
def _postprocess_keypoints(self, prediction_dict, classes, y_indices,
......@@ -2368,7 +2758,9 @@ class CenterNetMetaArch(model.DetectionModel):
return {'feature_extractor': self._feature_extractor.get_base_model()}
if fine_tune_checkpoint_type == 'detection':
return {'feature_extractor': self._feature_extractor.get_model()}
fake_model = tf.train.Checkpoint(
_feature_extractor=self._feature_extractor)
return {'model': fake_model}
else:
raise ValueError('Not supported fine tune checkpoint type - {}'.format(
......
......@@ -266,7 +266,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
masks_np[0, :, :3, 1] = 1 # Class 1.
masks = tf.constant(masks_np)
true_image_shapes = tf.constant([[6, 8, 3]])
instance_masks = cnma.convert_strided_predictions_to_instance_masks(
instance_masks, _ = cnma.convert_strided_predictions_to_instance_masks(
boxes, classes, masks, stride=2, mask_height=2, mask_width=2,
true_image_shapes=true_image_shapes)
return instance_masks
......@@ -289,6 +289,104 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
])
np.testing.assert_array_equal(expected_instance_masks, instance_masks)
def test_convert_strided_predictions_raises_error_with_one_tensor(self):
def graph_fn():
boxes = tf.constant(
[
[[0.5, 0.5, 1.0, 1.0],
[0.0, 0.5, 0.5, 1.0],
[0.0, 0.0, 0.0, 0.0]],
], tf.float32)
classes = tf.constant(
[
[0, 1, 0],
], tf.int32)
masks_np = np.zeros((1, 4, 4, 2), dtype=np.float32)
masks_np[0, :, 2:, 0] = 1 # Class 0.
masks_np[0, :, :3, 1] = 1 # Class 1.
masks = tf.constant(masks_np)
true_image_shapes = tf.constant([[6, 8, 3]])
densepose_part_heatmap = tf.random.uniform(
[1, 4, 4, 24])
instance_masks, _ = cnma.convert_strided_predictions_to_instance_masks(
boxes, classes, masks, true_image_shapes,
densepose_part_heatmap=densepose_part_heatmap,
densepose_surface_coords=None)
return instance_masks
with self.assertRaises(ValueError):
self.execute_cpu(graph_fn, [])
def test_crop_and_threshold_masks(self):
boxes_np = np.array(
[[0., 0., 0.5, 0.5],
[0.25, 0.25, 1.0, 1.0]], dtype=np.float32)
classes_np = np.array([0, 2], dtype=np.int32)
masks_np = np.zeros((4, 4, _NUM_CLASSES), dtype=np.float32)
masks_np[0, 0, 0] = 0.8
masks_np[1, 1, 0] = 0.6
masks_np[3, 3, 2] = 0.7
part_heatmap_np = np.zeros((4, 4, _DENSEPOSE_NUM_PARTS), dtype=np.float32)
part_heatmap_np[0, 0, 4] = 1
part_heatmap_np[0, 0, 2] = 0.6 # Lower scoring.
part_heatmap_np[1, 1, 8] = 0.2
part_heatmap_np[3, 3, 4] = 0.5
surf_coords_np = np.zeros((4, 4, 2 * _DENSEPOSE_NUM_PARTS),
dtype=np.float32)
surf_coords_np[:, :, 8:10] = 0.2, 0.9
surf_coords_np[:, :, 16:18] = 0.3, 0.5
true_height, true_width = 10, 10
input_height, input_width = 10, 10
mask_height = 4
mask_width = 4
def graph_fn():
elems = [
tf.constant(boxes_np),
tf.constant(classes_np),
tf.constant(masks_np),
tf.constant(part_heatmap_np),
tf.constant(surf_coords_np),
tf.constant(true_height, dtype=tf.int32),
tf.constant(true_width, dtype=tf.int32)
]
part_masks, surface_coords = cnma.crop_and_threshold_masks(
elems, input_height, input_width, mask_height=mask_height,
mask_width=mask_width, densepose_class_index=0)
return part_masks, surface_coords
part_masks, surface_coords = self.execute_cpu(graph_fn, [])
expected_part_masks = np.zeros((2, 4, 4), dtype=np.uint8)
expected_part_masks[0, 0, 0] = 5 # Recall classes are 1-indexed in output.
expected_part_masks[0, 2, 2] = 9 # Recall classes are 1-indexed in output.
expected_part_masks[1, 3, 3] = 1 # Standard instance segmentation mask.
expected_surface_coords = np.zeros((2, 4, 4, 2), dtype=np.float32)
expected_surface_coords[0, 0, 0, :] = 0.2, 0.9
expected_surface_coords[0, 2, 2, :] = 0.3, 0.5
np.testing.assert_allclose(expected_part_masks, part_masks)
np.testing.assert_allclose(expected_surface_coords, surface_coords)
def test_gather_surface_coords_for_parts(self):
surface_coords_cropped_np = np.zeros((2, 5, 5, _DENSEPOSE_NUM_PARTS, 2),
dtype=np.float32)
surface_coords_cropped_np[0, 0, 0, 5] = 0.3, 0.4
surface_coords_cropped_np[0, 1, 0, 9] = 0.5, 0.6
highest_scoring_part_np = np.zeros((2, 5, 5), dtype=np.int32)
highest_scoring_part_np[0, 0, 0] = 5
highest_scoring_part_np[0, 1, 0] = 9
def graph_fn():
surface_coords_cropped = tf.constant(surface_coords_cropped_np,
tf.float32)
highest_scoring_part = tf.constant(highest_scoring_part_np, tf.int32)
surface_coords_gathered = cnma.gather_surface_coords_for_parts(
surface_coords_cropped, highest_scoring_part)
return surface_coords_gathered
surface_coords_gathered = self.execute_cpu(graph_fn, [])
np.testing.assert_allclose([0.3, 0.4], surface_coords_gathered[0, 0, 0])
np.testing.assert_allclose([0.5, 0.6], surface_coords_gathered[0, 1, 0])
def test_top_k_feature_map_locations(self):
feature_map_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
feature_map_np[0, 2, 0, 1] = 1.0
......@@ -535,6 +633,8 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
keypoint_heatmap_np[1, 0, 1, 1] = 0.9
keypoint_heatmap_np[1, 2, 0, 1] = 0.8
# Note that the keypoint offsets are now per keypoint (as opposed to
# keypoint agnostic, in the test test_keypoint_candidate_prediction).
keypoint_heatmap_offsets_np = np.zeros((2, 3, 3, 4), dtype=np.float32)
keypoint_heatmap_offsets_np[0, 0, 0] = [0.5, 0.25, 0.0, 0.0]
keypoint_heatmap_offsets_np[0, 2, 1] = [-0.25, 0.5, 0.0, 0.0]
......@@ -949,6 +1049,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
_NUM_CLASSES = 10
_KEYPOINT_INDICES = [0, 1, 2, 3]
_NUM_KEYPOINTS = len(_KEYPOINT_INDICES)
_DENSEPOSE_NUM_PARTS = 24
_TASK_NAME = 'human_pose'
......@@ -991,6 +1092,20 @@ def get_fake_mask_params():
mask_width=4)
def get_fake_densepose_params():
"""Returns the fake DensePose estimation parameter namedtuple."""
return cnma.DensePoseParams(
class_id=1,
classification_loss=losses.WeightedSoftmaxClassificationLoss(),
localization_loss=losses.L1LocalizationLoss(),
part_loss_weight=1.0,
coordinate_loss_weight=1.0,
num_parts=_DENSEPOSE_NUM_PARTS,
task_loss_weight=1.0,
upsample_to_input_res=True,
upsample_method='nearest')
def build_center_net_meta_arch(build_resnet=False):
"""Builds the CenterNet meta architecture."""
if build_resnet:
......@@ -1018,7 +1133,8 @@ def build_center_net_meta_arch(build_resnet=False):
object_center_params=get_fake_center_params(),
object_detection_params=get_fake_od_params(),
keypoint_params_dict={_TASK_NAME: get_fake_kp_params()},
mask_params=get_fake_mask_params())
mask_params=get_fake_mask_params(),
densepose_params=get_fake_densepose_params())
def _logit(p):
......@@ -1102,6 +1218,16 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
fake_feature_map)
self.assertEqual((4, 128, 128, _NUM_CLASSES), output.shape)
# "densepose parts" head:
output = model._prediction_head_dict[cnma.DENSEPOSE_HEATMAP][-1](
fake_feature_map)
self.assertEqual((4, 128, 128, _DENSEPOSE_NUM_PARTS), output.shape)
# "densepose surface coordinates" head:
output = model._prediction_head_dict[cnma.DENSEPOSE_REGRESSION][-1](
fake_feature_map)
self.assertEqual((4, 128, 128, 2 * _DENSEPOSE_NUM_PARTS), output.shape)
def test_initialize_target_assigners(self):
model = build_center_net_meta_arch()
assigner_dict = model._initialize_target_assigners(
......@@ -1125,6 +1251,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertIsInstance(assigner_dict[cnma.SEGMENTATION_TASK],
cn_assigner.CenterNetMaskTargetAssigner)
# DensePose estimation target assigner:
self.assertIsInstance(assigner_dict[cnma.DENSEPOSE_TASK],
cn_assigner.CenterNetDensePoseTargetAssigner)
def test_predict(self):
"""Test the predict function."""
......@@ -1145,6 +1275,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
(2, 32, 32, 2))
self.assertEqual(prediction_dict[cnma.SEGMENTATION_HEATMAP][0].shape,
(2, 32, 32, _NUM_CLASSES))
self.assertEqual(prediction_dict[cnma.DENSEPOSE_HEATMAP][0].shape,
(2, 32, 32, _DENSEPOSE_NUM_PARTS))
self.assertEqual(prediction_dict[cnma.DENSEPOSE_REGRESSION][0].shape,
(2, 32, 32, 2 * _DENSEPOSE_NUM_PARTS))
def test_loss(self):
"""Test the loss function."""
......@@ -1157,7 +1291,13 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
groundtruth_keypoints_list=groundtruth_dict[
fields.BoxListFields.keypoints],
groundtruth_masks_list=groundtruth_dict[
fields.BoxListFields.masks])
fields.BoxListFields.masks],
groundtruth_dp_num_points_list=groundtruth_dict[
fields.BoxListFields.densepose_num_points],
groundtruth_dp_part_ids_list=groundtruth_dict[
fields.BoxListFields.densepose_part_ids],
groundtruth_dp_surface_coords_list=groundtruth_dict[
fields.BoxListFields.densepose_surface_coords])
prediction_dict = get_fake_prediction_dict(
input_height=16, input_width=32, stride=4)
......@@ -1193,6 +1333,12 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertGreater(
0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
cnma.SEGMENTATION_HEATMAP)])
self.assertGreater(
0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
cnma.DENSEPOSE_HEATMAP)])
self.assertGreater(
0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
cnma.DENSEPOSE_REGRESSION)])
@parameterized.parameters(
{'target_class_id': 1},
......@@ -1230,6 +1376,14 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
segmentation_heatmap[:, 14:18, 14:18, target_class_id] = 1.0
segmentation_heatmap = _logit(segmentation_heatmap)
dp_part_ind = 4
dp_part_heatmap = np.zeros((1, 32, 32, _DENSEPOSE_NUM_PARTS),
dtype=np.float32)
dp_part_heatmap[0, 14:18, 14:18, dp_part_ind] = 1.0
dp_part_heatmap = _logit(dp_part_heatmap)
dp_surf_coords = np.random.randn(1, 32, 32, 2 * _DENSEPOSE_NUM_PARTS)
class_center = tf.constant(class_center)
height_width = tf.constant(height_width)
offset = tf.constant(offset)
......@@ -1237,6 +1391,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32)
keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32)
segmentation_heatmap = tf.constant(segmentation_heatmap, dtype=tf.float32)
dp_part_heatmap = tf.constant(dp_part_heatmap, dtype=tf.float32)
dp_surf_coords = tf.constant(dp_surf_coords, dtype=tf.float32)
prediction_dict = {
cnma.OBJECT_CENTER: [class_center],
......@@ -1249,6 +1405,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION):
[keypoint_regression],
cnma.SEGMENTATION_HEATMAP: [segmentation_heatmap],
cnma.DENSEPOSE_HEATMAP: [dp_part_heatmap],
cnma.DENSEPOSE_REGRESSION: [dp_surf_coords]
}
def graph_fn():
......@@ -1271,12 +1429,13 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertAllEqual([1, max_detection, 4, 4],
detections['detection_masks'].shape)
# There should be some section of the first mask (correspond to the only
# detection) with non-zero mask values.
self.assertGreater(np.sum(detections['detection_masks'][0, 0, :, :] > 0), 0)
# Masks should be empty for everything but the first detection.
self.assertAllEqual(
detections['detection_masks'][0, 1:, :, :],
np.zeros_like(detections['detection_masks'][0, 1:, :, :]))
self.assertAllEqual(
detections['detection_surface_coords'][0, 1:, :, :],
np.zeros_like(detections['detection_surface_coords'][0, 1:, :, :]))
if target_class_id == 1:
expected_kpts_for_obj_0 = np.array(
......@@ -1287,6 +1446,12 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
expected_kpts_for_obj_0, rtol=1e-6)
np.testing.assert_allclose(detections['detection_keypoint_scores'][0][0],
expected_kpt_scores_for_obj_0, rtol=1e-6)
# First detection has DensePose parts.
self.assertSameElements(
np.unique(detections['detection_masks'][0, 0, :, :]),
set([0, dp_part_ind + 1]))
self.assertGreater(np.sum(np.abs(detections['detection_surface_coords'])),
0.0)
else:
# All keypoint outputs should be zeros.
np.testing.assert_allclose(
......@@ -1297,6 +1462,14 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
detections['detection_keypoint_scores'][0][0],
np.zeros([num_keypoints], np.float),
rtol=1e-6)
# Binary segmentation mask.
self.assertSameElements(
np.unique(detections['detection_masks'][0, 0, :, :]),
set([0, 1]))
# No DensePose surface coordinates.
np.testing.assert_allclose(
detections['detection_surface_coords'][0, 0, :, :],
np.zeros_like(detections['detection_surface_coords'][0, 0, :, :]))
def test_get_instance_indices(self):
classes = tf.constant([[0, 1, 2, 0], [2, 1, 2, 2]], dtype=tf.int32)
......@@ -1353,6 +1526,17 @@ def get_fake_prediction_dict(input_height, input_width, stride):
mask_heatmap[0, 2, 4, 1] = 1.0
mask_heatmap = _logit(mask_heatmap)
densepose_heatmap = np.zeros((2, output_height, output_width,
_DENSEPOSE_NUM_PARTS), dtype=np.float32)
densepose_heatmap[0, 2, 4, 5] = 1.0
densepose_heatmap = _logit(densepose_heatmap)
densepose_regression = np.zeros((2, output_height, output_width,
2 * _DENSEPOSE_NUM_PARTS), dtype=np.float32)
# The surface coordinate indices for part index 5 are:
# (5 * 2, 5 * 2 + 1), or (10, 11).
densepose_regression[0, 2, 4, 10:12] = 0.4, 0.7
prediction_dict = {
'preprocessed_inputs':
tf.zeros((2, input_height, input_width, 3)),
......@@ -1383,6 +1567,14 @@ def get_fake_prediction_dict(input_height, input_width, stride):
cnma.SEGMENTATION_HEATMAP: [
tf.constant(mask_heatmap),
tf.constant(mask_heatmap)
],
cnma.DENSEPOSE_HEATMAP: [
tf.constant(densepose_heatmap),
tf.constant(densepose_heatmap),
],
cnma.DENSEPOSE_REGRESSION: [
tf.constant(densepose_regression),
tf.constant(densepose_regression),
]
}
return prediction_dict
......@@ -1427,12 +1619,30 @@ def get_fake_groundtruth_dict(input_height, input_width, stride):
tf.constant(mask),
tf.zeros_like(mask),
]
densepose_num_points = [
tf.constant([1], dtype=tf.int32),
tf.constant([0], dtype=tf.int32),
]
densepose_part_ids = [
tf.constant([[5, 0, 0]], dtype=tf.int32),
tf.constant([[0, 0, 0]], dtype=tf.int32),
]
densepose_surface_coords_np = np.zeros((1, 3, 4), dtype=np.float32)
densepose_surface_coords_np[0, 0, :] = 0.55, 0.55, 0.4, 0.7
densepose_surface_coords = [
tf.constant(densepose_surface_coords_np),
tf.zeros_like(densepose_surface_coords_np)
]
groundtruth_dict = {
fields.BoxListFields.boxes: boxes,
fields.BoxListFields.weights: weights,
fields.BoxListFields.classes: classes,
fields.BoxListFields.keypoints: keypoints,
fields.BoxListFields.masks: masks,
fields.BoxListFields.densepose_num_points: densepose_num_points,
fields.BoxListFields.densepose_part_ids: densepose_part_ids,
fields.BoxListFields.densepose_surface_coords:
densepose_surface_coords,
fields.InputDataFields.groundtruth_labeled_classes: labeled_classes,
}
return groundtruth_dict
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment