Commit 44e7092c authored by stephenwu's avatar stephenwu
Browse files

Merge branch 'master' of https://github.com/tensorflow/models into AXg

parents 431a9ca3 59434199
......@@ -94,22 +94,37 @@ class DetectionInferenceModule(tf.Module):
def _get_side_names_from_zip(self, zipped_side_inputs):
return [side[2] for side in zipped_side_inputs]
def _run_inference_on_images(self, image, **kwargs):
def _preprocess_input(self, batch_input, decode_fn):
# Input preprocessing happends on the CPU. We don't need to use the device
# placement as it is automatically handled by TF.
def _decode_and_preprocess(single_input):
image = decode_fn(single_input)
image = tf.cast(image, tf.float32)
image, true_shape = self._model.preprocess(image[tf.newaxis, :, :, :])
return image[0], true_shape[0]
images, true_shapes = tf.map_fn(
_decode_and_preprocess,
elems=batch_input,
parallel_iterations=32,
back_prop=False,
fn_output_signature=(tf.float32, tf.int32))
return images, true_shapes
def _run_inference_on_images(self, images, true_shapes, **kwargs):
"""Cast image to float and run inference.
Args:
image: uint8 Tensor of shape [1, None, None, 3].
images: float32 Tensor of shape [None, None, None, 3].
true_shapes: int32 Tensor of form [batch, 3]
**kwargs: additional keyword arguments.
Returns:
Tensor dictionary holding detections.
"""
label_id_offset = 1
image = tf.cast(image, tf.float32)
image, shapes = self._model.preprocess(image)
prediction_dict = self._model.predict(image, shapes, **kwargs)
detections = self._model.postprocess(prediction_dict, shapes)
prediction_dict = self._model.predict(images, true_shapes, **kwargs)
detections = self._model.postprocess(prediction_dict, true_shapes)
classes_field = fields.DetectionResultFields.detection_classes
detections[classes_field] = (
tf.cast(detections[classes_field], tf.float32) + label_id_offset)
......@@ -144,7 +159,8 @@ class DetectionFromImageModule(DetectionInferenceModule):
def call_func(input_tensor, *side_inputs):
kwargs = dict(zip(self._side_input_names, side_inputs))
return self._run_inference_on_images(input_tensor, **kwargs)
images, true_shapes = self._preprocess_input(input_tensor, lambda x: x)
return self._run_inference_on_images(images, true_shapes, **kwargs)
self.__call__ = tf.function(call_func, input_signature=sig)
......@@ -154,44 +170,43 @@ class DetectionFromImageModule(DetectionInferenceModule):
zipped_side_inputs)
def get_true_shapes(input_tensor):
input_shape = tf.shape(input_tensor)
batch = input_shape[0]
image_shape = input_shape[1:]
true_shapes = tf.tile(image_shape[tf.newaxis, :], [batch, 1])
return true_shapes
class DetectionFromFloatImageModule(DetectionInferenceModule):
"""Detection Inference Module for float image inputs."""
@tf.function(
input_signature=[
tf.TensorSpec(shape=[1, None, None, 3], dtype=tf.float32)])
tf.TensorSpec(shape=[None, None, None, 3], dtype=tf.float32)])
def __call__(self, input_tensor):
return self._run_inference_on_images(input_tensor)
images, true_shapes = self._preprocess_input(input_tensor, lambda x: x)
return self._run_inference_on_images(images,
true_shapes)
class DetectionFromEncodedImageModule(DetectionInferenceModule):
"""Detection Inference Module for encoded image string inputs."""
@tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.string)])
@tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)])
def __call__(self, input_tensor):
with tf.device('cpu:0'):
image = tf.map_fn(
_decode_image,
elems=input_tensor,
dtype=tf.uint8,
parallel_iterations=32,
back_prop=False)
return self._run_inference_on_images(image)
images, true_shapes = self._preprocess_input(input_tensor, _decode_image)
return self._run_inference_on_images(images, true_shapes)
class DetectionFromTFExampleModule(DetectionInferenceModule):
"""Detection Inference Module for TF.Example inputs."""
@tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.string)])
@tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)])
def __call__(self, input_tensor):
with tf.device('cpu:0'):
image = tf.map_fn(
_decode_tf_example,
elems=input_tensor,
dtype=tf.uint8,
parallel_iterations=32,
back_prop=False)
return self._run_inference_on_images(image)
images, true_shapes = self._preprocess_input(input_tensor,
_decode_tf_example)
return self._run_inference_on_images(images, true_shapes)
DETECTION_MODULE_MAP = {
'image_tensor': DetectionFromImageModule,
......
......@@ -307,6 +307,14 @@ def transform_input_data(tensor_dict,
out_tensor_dict[flds_gt_kpt_vis] = tf.ones_like(
out_tensor_dict[flds_gt_kpt][:, :, 0],
dtype=tf.bool)
flds_gt_kpt_depth = fields.InputDataFields.groundtruth_keypoint_depths
flds_gt_kpt_depth_weight = (
fields.InputDataFields.groundtruth_keypoint_depth_weights)
if flds_gt_kpt_depth in out_tensor_dict:
out_tensor_dict[flds_gt_kpt_depth] = out_tensor_dict[flds_gt_kpt_depth]
out_tensor_dict[flds_gt_kpt_depth_weight] = out_tensor_dict[
flds_gt_kpt_depth_weight]
out_tensor_dict[flds_gt_kpt_weights] = (
keypoint_ops.keypoint_weights_from_visibilities(
out_tensor_dict[flds_gt_kpt_vis],
......@@ -506,6 +514,15 @@ def pad_input_data_to_static_shapes(tensor_dict,
padding_shapes[input_fields.
groundtruth_keypoint_visibilities] = padding_shape
if fields.InputDataFields.groundtruth_keypoint_depths in tensor_dict:
tensor_shape = tensor_dict[fields.InputDataFields.
groundtruth_keypoint_depths].shape
padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])]
padding_shapes[fields.InputDataFields.
groundtruth_keypoint_depths] = padding_shape
padding_shapes[fields.InputDataFields.
groundtruth_keypoint_depth_weights] = padding_shape
if input_fields.groundtruth_keypoint_weights in tensor_dict:
tensor_shape = (
tensor_dict[input_fields.groundtruth_keypoint_weights].shape)
......@@ -587,6 +604,8 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in tensor_dict)
include_keypoint_visibilities = (
fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict)
include_keypoint_depths = (
fields.InputDataFields.groundtruth_keypoint_depths in tensor_dict)
include_label_weights = (fields.InputDataFields.groundtruth_weights
in tensor_dict)
include_label_confidences = (fields.InputDataFields.groundtruth_confidences
......@@ -606,7 +625,8 @@ def augment_input_data(tensor_dict, data_augmentation_options):
include_instance_masks=include_instance_masks,
include_keypoints=include_keypoints,
include_keypoint_visibilities=include_keypoint_visibilities,
include_dense_pose=include_dense_pose))
include_dense_pose=include_dense_pose,
include_keypoint_depths=include_keypoint_depths))
tensor_dict[fields.InputDataFields.image] = tf.squeeze(
tensor_dict[fields.InputDataFields.image], axis=0)
return tensor_dict
......@@ -628,6 +648,8 @@ def _get_labels_dict(input_dict):
fields.InputDataFields.groundtruth_confidences,
fields.InputDataFields.groundtruth_labeled_classes,
fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_keypoint_depths,
fields.InputDataFields.groundtruth_keypoint_depth_weights,
fields.InputDataFields.groundtruth_instance_masks,
fields.InputDataFields.groundtruth_area,
fields.InputDataFields.groundtruth_is_crowd,
......
......@@ -1420,6 +1420,49 @@ class DataTransformationFnTest(test_case.TestCase, parameterized.TestCase):
[[[0., 0., 0., 0.,], [0., 0., 0., 0.,]],
[[0.1, 0.1, 0.3, 0.4,], [0.6, 0.4, 0.6, 0.7,]]])
def test_groundtruth_keypoint_depths(self):
def graph_fn():
tensor_dict = {
fields.InputDataFields.image:
tf.constant(np.random.rand(100, 50, 3).astype(np.float32)),
fields.InputDataFields.groundtruth_boxes:
tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]],
np.float32)),
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([1, 2], np.int32)),
fields.InputDataFields.groundtruth_keypoints:
tf.constant([[[0.1, 0.2], [0.3, 0.4]],
[[0.5, 0.6], [0.7, 0.8]]]),
fields.InputDataFields.groundtruth_keypoint_visibilities:
tf.constant([[True, False], [True, True]]),
fields.InputDataFields.groundtruth_keypoint_depths:
tf.constant([[1.0, 0.9], [0.8, 0.7]]),
fields.InputDataFields.groundtruth_keypoint_depth_weights:
tf.constant([[0.7, 0.8], [0.9, 1.0]]),
}
num_classes = 3
keypoint_type_weight = [1.0, 2.0]
input_transformation_fn = functools.partial(
inputs.transform_input_data,
model_preprocess_fn=_fake_resize50_preprocess_fn,
image_resizer_fn=_fake_image_resizer_fn,
num_classes=num_classes,
keypoint_type_weight=keypoint_type_weight)
transformed_inputs = input_transformation_fn(tensor_dict=tensor_dict)
return (transformed_inputs[
fields.InputDataFields.groundtruth_keypoint_depths],
transformed_inputs[
fields.InputDataFields.groundtruth_keypoint_depth_weights])
keypoint_depths, keypoint_depth_weights = self.execute_cpu(graph_fn, [])
self.assertAllClose(
keypoint_depths,
[[1.0, 0.9], [0.8, 0.7]])
self.assertAllClose(
keypoint_depth_weights,
[[0.7, 0.8], [0.9, 1.0]])
class PadInputDataToStaticShapesFnTest(test_case.TestCase):
......
......@@ -32,6 +32,7 @@ from object_detection.core import model
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner as cn_assigner
from object_detection.utils import shape_utils
from object_detection.utils import target_assigner_utils as ta_utils
# Number of channels needed to predict size and offsets.
NUM_OFFSET_CHANNELS = 2
......@@ -526,6 +527,125 @@ def prediction_tensors_to_keypoint_candidates(
return keypoint_candidates, keypoint_scores, num_candidates
def prediction_to_single_instance_keypoints(object_heatmap, keypoint_heatmap,
keypoint_offset,
keypoint_regression, stride,
object_center_std_dev,
keypoint_std_dev, kp_params):
"""Postprocess function to predict single instance keypoints.
This is a simplified postprocessing function based on the assumption that
there is only one instance in the image. If there are multiple instances in
the image, the model prefers to predict the one that is closest to the image
center. Here is a high-level description of what this function does:
1) Object heatmap re-weighted by image center Gaussian is used to determine
the instance center.
2) Regressed keypoint locations are retrieved from the instance center. The
Gaussian kernel is applied to the regressed keypoint locations to
re-weight the keypoint heatmap. This is to select the keypoints that are
associated with the center instance without using top_k op.
3) The keypoint locations are computed by the re-weighted keypoint heatmap
and the keypoint offset.
Args:
object_heatmap: A float tensor of shape [1, height, width, 1] representing
the heapmap of the class.
keypoint_heatmap: A float tensor of shape [1, height, width, num_keypoints]
representing the per-keypoint heatmaps.
keypoint_offset: A float tensor of shape [1, height, width, 2] (or [1,
height, width, 2 * num_keypoints] if 'per_keypoint_offset' is set True)
representing the per-keypoint offsets.
keypoint_regression: A float tensor of shape [1, height, width, 2 *
num_keypoints] representing the joint regression prediction.
stride: The stride in the output space.
object_center_std_dev: The standard deviation of the Gaussian mask which is
applied to the object_heatmap. The goal is to upweight the instance that
is closer to the image center. Expressed in units of input image pixels.
keypoint_std_dev: The standard deviation of the Gaussian masks which are
applied to the keypoint_heatmap based on the regressed joint location. It
is used to upweight the keypoint joints that belongs to the targeted
instance. If keypoint_std_dev contains 1 element, all keypoint joints will
share the same value. Otherwise, it must contain num_keypoints elements,
representing the standard deviation corresponding to each joint.
kp_params: A `KeypointEstimationParams` object with parameters for a single
keypoint class.
Returns:
A tuple of two tensors:
keypoint_candidates: A float tensor with shape [1, 1, num_keypoints, 2]
representing the yx-coordinates of the keypoints in the output feature
map space.
keypoint_scores: A float tensor with shape [1, 1, num_keypoints]
representing the keypoint prediction scores.
Raises:
ValueError: if the input keypoint_std_dev doesn't have valid number of
elements (1 or num_keypoints).
"""
num_keypoints = len(kp_params.keypoint_std_dev)
batch_size, height, width, _ = _get_shape(keypoint_heatmap, 4)
# Apply the Gaussian mask to the image center.
image_center_y = tf.convert_to_tensor([0.5 * height], dtype=tf.float32)
image_center_x = tf.convert_to_tensor([0.5 * width], dtype=tf.float32)
(y_grid, x_grid) = ta_utils.image_shape_to_grids(height, width)
# Mask shape: [1, height, width, 1]
object_mask = tf.expand_dims(
ta_utils.coordinates_to_heatmap(y_grid, x_grid, image_center_y,
image_center_x,
object_center_std_dev / stride,
tf.one_hot(tf.range(1), depth=1)), axis=0)
object_heatmap = tf.math.multiply(object_heatmap, object_mask)
# Pick the highest score and location of the weighted object heatmap.
_, y_indices, x_indices, _ = (
top_k_feature_map_locations(
object_heatmap, max_pool_kernel_size=1, k=1, per_channel=True))
_, num_indices = _get_shape(y_indices, 2)
combined_indices = tf.stack([
_multi_range(batch_size, value_repetitions=num_indices),
tf.reshape(y_indices, [-1]),
tf.reshape(x_indices, [-1])
], axis=1)
# Select the regression vectors from the object center.
selected_regression_flat = tf.gather_nd(keypoint_regression, combined_indices)
# shape: [num_keypoints, 2]
regression_offsets = tf.reshape(selected_regression_flat, [num_keypoints, -1])
(y_reg, x_reg) = tf.unstack(regression_offsets, axis=1)
y_regressed = tf.cast(y_indices, dtype=tf.float32) + y_reg
x_regressed = tf.cast(x_indices, dtype=tf.float32) + x_reg
# Prepare and apply the keypoint heatmap masks.
keypoint_std_dev = [x / stride for x in keypoint_std_dev]
if len(keypoint_std_dev) == 1:
std_dev = tf.convert_to_tensor(
keypoint_std_dev * num_keypoints, dtype=tf.float32)
elif len(keypoint_std_dev) == num_keypoints:
std_dev = tf.convert_to_tensor(
keypoint_std_dev, dtype=tf.float32)
else:
raise ValueError('keypoint_std_dev needs to have length either '
'equal to 1 or num_keypoints.')
channel_onehot = tf.one_hot(tf.range(num_keypoints), depth=num_keypoints)
keypoint_mask = tf.expand_dims(
ta_utils.coordinates_to_heatmap(y_grid, x_grid, y_regressed, x_regressed,
std_dev, channel_onehot), axis=0)
keypoint_predictions = tf.math.multiply(keypoint_heatmap, keypoint_mask)
# Get the keypoint locations/scores:
# keypoint_candidates: [1, 1, num_keypoints, 2]
# keypoint_scores: [1, 1, num_keypoints]
(keypoint_candidates, keypoint_scores,
_) = prediction_tensors_to_keypoint_candidates(
keypoint_predictions,
keypoint_offset,
keypoint_score_threshold=kp_params.keypoint_candidate_score_threshold,
max_pool_kernel_size=kp_params.peak_max_pool_kernel_size,
max_candidates=1)
return keypoint_candidates, keypoint_scores
def regressed_keypoints_at_object_centers(regressed_keypoint_predictions,
y_indices, x_indices):
"""Returns the regressed keypoints at specified object centers.
......@@ -1776,7 +1896,8 @@ class CenterNetMetaArch(model.DetectionModel):
track_params=None,
temporal_offset_params=None,
use_depthwise=False,
compute_heatmap_sparse=False):
compute_heatmap_sparse=False,
non_max_suppression_fn=None):
"""Initializes a CenterNet model.
Args:
......@@ -1819,6 +1940,7 @@ class CenterNetMetaArch(model.DetectionModel):
the Op that computes the center heatmaps. The sparse version scales
better with number of channels in the heatmap, but in some cases is
known to cause an OOM error. See b/170989061.
non_max_suppression_fn: Optional Non Max Suppression function to apply.
"""
assert object_detection_params or keypoint_params_dict
# Shorten the name for convenience and better formatting.
......@@ -1857,6 +1979,7 @@ class CenterNetMetaArch(model.DetectionModel):
# Will be used in VOD single_frame_meta_arch for tensor reshape.
self._batched_prediction_tensor_names = []
self._non_max_suppression_fn = non_max_suppression_fn
super(CenterNetMetaArch, self).__init__(num_classes)
......@@ -2988,6 +3111,117 @@ class CenterNetMetaArch(model.DetectionModel):
prediction_dict[TEMPORAL_OFFSET][-1])
postprocess_dict[fields.DetectionResultFields.detection_offsets] = offsets
if self._non_max_suppression_fn:
boxes = tf.expand_dims(
postprocess_dict.pop(fields.DetectionResultFields.detection_boxes),
axis=-2)
multiclass_scores = postprocess_dict[
fields.DetectionResultFields.detection_multiclass_scores]
num_valid_boxes = postprocess_dict.pop(
fields.DetectionResultFields.num_detections)
# Remove scores and classes as NMS will compute these form multiclass
# scores.
postprocess_dict.pop(fields.DetectionResultFields.detection_scores)
postprocess_dict.pop(fields.DetectionResultFields.detection_classes)
(nmsed_boxes, nmsed_scores, nmsed_classes, _, nmsed_additional_fields,
num_detections) = self._non_max_suppression_fn(
boxes,
multiclass_scores,
additional_fields=postprocess_dict,
num_valid_boxes=num_valid_boxes)
postprocess_dict = nmsed_additional_fields
postprocess_dict[
fields.DetectionResultFields.detection_boxes] = nmsed_boxes
postprocess_dict[
fields.DetectionResultFields.detection_scores] = nmsed_scores
postprocess_dict[
fields.DetectionResultFields.detection_classes] = nmsed_classes
postprocess_dict[
fields.DetectionResultFields.num_detections] = num_detections
postprocess_dict.update(nmsed_additional_fields)
return postprocess_dict
def postprocess_single_instance_keypoints(self, prediction_dict,
true_image_shapes,
object_center_std_dev,
keypoint_std_dev):
"""Postprocess for predicting single instance keypoints.
This postprocess function is a special case of predicting the keypoint of
a single instance in the image (original CenterNet postprocess supports
multi-instance prediction). Due to the simplification assumption, this
postprocessing function achieves much faster inference time.
Here is a short list of the modifications made in this function:
1) Assume the model predicts only single class keypoint.
2) Assume there is only one instance in the image. If multiple instances
appear in the image, the model tends to predict the one that is closer
to the image center (the other ones are considered as background and
are rejected by the model).
3) Avoid using top_k ops in the postprocessing logics since it is slower
than using argmax.
4) The predictions other than the keypoints are ignored, e.g. boxes.
5) The input batch size is assumed to be 1.
Args:
prediction_dict: a dictionary holding predicted tensors from "predict"
function.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is of
the form [height, width, channels] indicating the shapes of true images
in the resized images, as resized images can be padded with zeros.
object_center_std_dev: The standard deviation of the Gaussian mask which
is applied to the object_heatmap. The goal is to upweight the instance
that is closer to the image center. Expressed in units of input image
pixels.
keypoint_std_dev: The standard deviation of the Gaussian masks which are
applied to the keypoint_heatmap based on the regressed joint location.
It is used to upweight the keypoint joints that belongs to the targeted
instance. If keypoint_std_dev contains one value, then we assume the
same value is applied to all keypoint joints. If keypoint_std_dev is a
list, it must contain num_keypoints elements, representing the standard
deviation corresponding to each joints.
Returns:
detections: a dictionary containing the following fields
detection_keypoints: A float tensor of shape
[1, 1, num_keypoints, 2] with normalized keypoints. Any invalid
keypoints have their coordinates and scores set to 0.0.
detection_keypoint_scores: A float tensor of shape
[1, 1, num_keypoints] with scores for each keypoint.
"""
# The number of keypoint task is expected to be 1.
assert len(self._kp_params_dict) == 1
task_name, kp_params = next(iter(self._kp_params_dict.items()))
keypoint_heatmap = tf.nn.sigmoid(prediction_dict[get_keypoint_name(
task_name, KEYPOINT_HEATMAP)][-1])
keypoint_offset = prediction_dict[get_keypoint_name(task_name,
KEYPOINT_OFFSET)][-1]
keypoint_regression = prediction_dict[get_keypoint_name(
task_name, KEYPOINT_REGRESSION)][-1]
object_heatmap = tf.nn.sigmoid(prediction_dict[OBJECT_CENTER][-1])
keypoints, keypoint_scores = (
prediction_to_single_instance_keypoints(
object_heatmap=object_heatmap,
keypoint_heatmap=keypoint_heatmap,
keypoint_offset=keypoint_offset,
keypoint_regression=keypoint_regression,
stride=self._stride,
object_center_std_dev=object_center_std_dev,
keypoint_std_dev=keypoint_std_dev,
kp_params=kp_params))
keypoints, keypoint_scores = (
convert_strided_predictions_to_normalized_keypoints(
keypoints,
keypoint_scores,
self._stride,
true_image_shapes,
clip_out_of_frame_keypoints=False))
postprocess_dict = {
fields.DetectionResultFields.detection_keypoints: keypoints,
fields.DetectionResultFields.detection_keypoint_scores: keypoint_scores
}
return postprocess_dict
def _postprocess_embeddings(self, prediction_dict, y_indices, x_indices):
......
......@@ -24,12 +24,14 @@ from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.builders import post_processing_builder
from object_detection.core import losses
from object_detection.core import preprocessor
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner as cn_assigner
from object_detection.meta_architectures import center_net_meta_arch as cnma
from object_detection.models import center_net_resnet_feature_extractor
from object_detection.protos import post_processing_pb2
from object_detection.utils import test_case
from object_detection.utils import tf_version
......@@ -734,6 +736,75 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
np.testing.assert_array_equal(expected_num_keypoint_candidates,
num_keypoint_candidates)
def test_prediction_to_single_instance_keypoints(self):
image_size = (9, 9)
object_heatmap_np = np.zeros((1, image_size[0], image_size[1], 1),
dtype=np.float32)
# This should be picked.
object_heatmap_np[0, 4, 4, 0] = 0.9
# This shouldn't be picked since it's farther away from the center.
object_heatmap_np[0, 2, 2, 0] = 1.0
keypoint_heatmap_np = np.zeros((1, image_size[0], image_size[1], 4),
dtype=np.float32)
# Top-left corner should be picked.
keypoint_heatmap_np[0, 1, 1, 0] = 0.9
keypoint_heatmap_np[0, 4, 4, 0] = 1.0
# Top-right corner should be picked.
keypoint_heatmap_np[0, 1, 7, 1] = 0.9
keypoint_heatmap_np[0, 4, 4, 1] = 1.0
# Bottom-left corner should be picked.
keypoint_heatmap_np[0, 7, 1, 2] = 0.9
keypoint_heatmap_np[0, 4, 4, 2] = 1.0
# Bottom-right corner should be picked.
keypoint_heatmap_np[0, 7, 7, 3] = 0.9
keypoint_heatmap_np[0, 4, 4, 3] = 1.0
keypoint_offset_np = np.zeros((1, image_size[0], image_size[1], 2),
dtype=np.float32)
keypoint_offset_np[0, 1, 1] = [0.5, 0.5]
keypoint_offset_np[0, 1, 7] = [0.5, -0.5]
keypoint_offset_np[0, 7, 1] = [-0.5, 0.5]
keypoint_offset_np[0, 7, 7] = [-0.5, -0.5]
keypoint_regression_np = np.zeros((1, image_size[0], image_size[1], 8),
dtype=np.float32)
keypoint_regression_np[0, 4, 4] = [-3, -3, -3, 3, 3, -3, 3, 3]
kp_params = get_fake_kp_params(num_candidates_per_keypoint=1)
def graph_fn():
object_heatmap = tf.constant(object_heatmap_np, dtype=tf.float32)
keypoint_heatmap = tf.constant(keypoint_heatmap_np, dtype=tf.float32)
keypoint_offset = tf.constant(keypoint_offset_np, dtype=tf.float32)
keypoint_regression = tf.constant(
keypoint_regression_np, dtype=tf.float32)
(keypoint_cands, keypoint_scores) = (
cnma.prediction_to_single_instance_keypoints(
object_heatmap,
keypoint_heatmap,
keypoint_offset,
keypoint_regression,
stride=4,
object_center_std_dev=image_size[0] / 2,
keypoint_std_dev=[image_size[0] / 10],
kp_params=kp_params))
return keypoint_cands, keypoint_scores
(keypoint_cands, keypoint_scores) = self.execute(graph_fn, [])
expected_keypoint_candidates = [[[
[1.5, 1.5], # top-left
[1.5, 6.5], # top-right
[6.5, 1.5], # bottom-left
[6.5, 6.5], # bottom-right
]]]
expected_keypoint_scores = [[[0.9, 0.9, 0.9, 0.9]]]
np.testing.assert_allclose(expected_keypoint_candidates, keypoint_cands)
np.testing.assert_allclose(expected_keypoint_scores, keypoint_scores)
def test_keypoint_candidate_prediction_per_keypoints(self):
keypoint_heatmap_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
keypoint_heatmap_np[0, 0, 0, 0] = 1.0
......@@ -1280,7 +1351,9 @@ def get_fake_temporal_offset_params():
def build_center_net_meta_arch(build_resnet=False,
num_classes=_NUM_CLASSES,
max_box_predictions=5):
max_box_predictions=5,
apply_non_max_suppression=False,
detection_only=False):
"""Builds the CenterNet meta architecture."""
if build_resnet:
feature_extractor = (
......@@ -1299,7 +1372,31 @@ def build_center_net_meta_arch(build_resnet=False,
max_dimension=128,
pad_to_max_dimesnion=True)
if num_classes == 1:
non_max_suppression_fn = None
if apply_non_max_suppression:
post_processing_proto = post_processing_pb2.PostProcessing()
post_processing_proto.batch_non_max_suppression.iou_threshold = 1.0
post_processing_proto.batch_non_max_suppression.score_threshold = 0.6
(post_processing_proto.batch_non_max_suppression.max_total_detections
) = max_box_predictions
(post_processing_proto.batch_non_max_suppression.max_detections_per_class
) = max_box_predictions
(post_processing_proto.batch_non_max_suppression.change_coordinate_frame
) = False
non_max_suppression_fn, _ = post_processing_builder.build(
post_processing_proto)
if detection_only:
return cnma.CenterNetMetaArch(
is_training=True,
add_summaries=False,
num_classes=num_classes,
feature_extractor=feature_extractor,
image_resizer_fn=image_resizer_fn,
object_center_params=get_fake_center_params(max_box_predictions),
object_detection_params=get_fake_od_params(),
non_max_suppression_fn=non_max_suppression_fn)
elif num_classes == 1:
num_candidates_per_keypoint = 100 if max_box_predictions > 1 else 1
return cnma.CenterNetMetaArch(
is_training=True,
......@@ -1311,7 +1408,8 @@ def build_center_net_meta_arch(build_resnet=False,
object_detection_params=get_fake_od_params(),
keypoint_params_dict={
_TASK_NAME: get_fake_kp_params(num_candidates_per_keypoint)
})
},
non_max_suppression_fn=non_max_suppression_fn)
else:
return cnma.CenterNetMetaArch(
is_training=True,
......@@ -1325,7 +1423,8 @@ def build_center_net_meta_arch(build_resnet=False,
mask_params=get_fake_mask_params(),
densepose_params=get_fake_densepose_params(),
track_params=get_fake_track_params(),
temporal_offset_params=get_fake_temporal_offset_params())
temporal_offset_params=get_fake_temporal_offset_params(),
non_max_suppression_fn=non_max_suppression_fn)
def _logit(p):
......@@ -1659,7 +1758,6 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
return detections
detections = self.execute_cpu(graph_fn, [])
self.assertAllClose(detections['detection_boxes'][0, 0],
np.array([55, 46, 75, 86]) / 128.0)
self.assertAllClose(detections['detection_scores'][0],
......@@ -1732,6 +1830,49 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
detections['detection_surface_coords'][0, 0, :, :],
np.zeros_like(detections['detection_surface_coords'][0, 0, :, :]))
def test_non_max_suppression(self):
"""Tests application of NMS on CenterNet detections."""
target_class_id = 1
model = build_center_net_meta_arch(apply_non_max_suppression=True,
detection_only=True)
class_center = np.zeros((1, 32, 32, 10), dtype=np.float32)
height_width = np.zeros((1, 32, 32, 2), dtype=np.float32)
offset = np.zeros((1, 32, 32, 2), dtype=np.float32)
class_probs = np.ones(10) * _logit(0.25)
class_probs[target_class_id] = _logit(0.75)
class_center[0, 16, 16] = class_probs
height_width[0, 16, 16] = [5, 10]
offset[0, 16, 16] = [.25, .5]
class_center = tf.constant(class_center)
height_width = tf.constant(height_width)
offset = tf.constant(offset)
prediction_dict = {
cnma.OBJECT_CENTER: [class_center],
cnma.BOX_SCALE: [height_width],
cnma.BOX_OFFSET: [offset],
}
def graph_fn():
detections = model.postprocess(prediction_dict,
tf.constant([[128, 128, 3]]))
return detections
detections = self.execute_cpu(graph_fn, [])
num_detections = int(detections['num_detections'])
self.assertEqual(num_detections, 1)
self.assertAllClose(detections['detection_boxes'][0, 0],
np.array([55, 46, 75, 86]) / 128.0)
self.assertAllClose(detections['detection_scores'][0][:num_detections],
[.75])
expected_multiclass_scores = [.25] * 10
expected_multiclass_scores[target_class_id] = .75
self.assertAllClose(expected_multiclass_scores,
detections['detection_multiclass_scores'][0][0])
def test_postprocess_single_class(self):
"""Test the postprocess function."""
model = build_center_net_meta_arch(num_classes=1)
......@@ -1798,6 +1939,59 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertAllEqual([1, max_detection, num_keypoints],
detections['detection_keypoint_scores'].shape)
def test_postprocess_single_instance(self):
"""Test the postprocess single instance function."""
model = build_center_net_meta_arch(num_classes=1)
num_keypoints = len(model._kp_params_dict[_TASK_NAME].keypoint_indices)
class_center = np.zeros((1, 32, 32, 1), dtype=np.float32)
keypoint_heatmaps = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32)
keypoint_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32)
keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2)
class_probs = np.zeros(1)
class_probs[0] = _logit(0.75)
class_center[0, 16, 16] = class_probs
keypoint_regression[0, 16, 16] = [
-1., -1.,
-1., 1.,
1., -1.,
1., 1.]
keypoint_heatmaps[0, 14, 14, 0] = _logit(0.9)
keypoint_heatmaps[0, 14, 18, 1] = _logit(0.9)
keypoint_heatmaps[0, 18, 14, 2] = _logit(0.9)
keypoint_heatmaps[0, 18, 18, 3] = _logit(0.05) # Note the low score.
class_center = tf.constant(class_center)
keypoint_heatmaps = tf.constant(keypoint_heatmaps, dtype=tf.float32)
keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32)
keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32)
prediction_dict = {
cnma.OBJECT_CENTER: [class_center],
cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_HEATMAP):
[keypoint_heatmaps],
cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_OFFSET):
[keypoint_offsets],
cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION):
[keypoint_regression],
}
def graph_fn():
detections = model.postprocess_single_instance_keypoints(
prediction_dict,
tf.constant([[128, 128, 3]]),
object_center_std_dev=32,
keypoint_std_dev=[32])
return detections
detections = self.execute_cpu(graph_fn, [])
self.assertAllEqual([1, 1, num_keypoints, 2],
detections['detection_keypoints'].shape)
self.assertAllEqual([1, 1, num_keypoints],
detections['detection_keypoint_scores'].shape)
def test_get_instance_indices(self):
classes = tf.constant([[0, 1, 2, 0], [2, 1, 2, 2]], dtype=tf.int32)
num_detections = tf.constant([1, 3], dtype=tf.int32)
......
......@@ -971,12 +971,12 @@ def _evaluate_checkpoint(estimator,
raise e
def continuous_eval(estimator,
model_dir,
input_fn,
train_steps,
name,
max_retries=0):
def continuous_eval_generator(estimator,
model_dir,
input_fn,
train_steps,
name,
max_retries=0):
"""Perform continuous evaluation on checkpoints written to a model directory.
Args:
......@@ -989,6 +989,9 @@ def continuous_eval(estimator,
max_retries: Maximum number of times to retry the evaluation on encountering
a tf.errors.InvalidArgumentError. If negative, will always retry the
evaluation.
Yields:
Pair of current step and eval_results.
"""
def terminate_eval():
......@@ -1011,6 +1014,7 @@ def continuous_eval(estimator,
# Terminate eval job when final checkpoint is reached
current_step = int(os.path.basename(ckpt).split('-')[1])
yield (current_step, eval_results)
if current_step >= train_steps:
tf.logging.info(
'Evaluation finished after training step %d' % current_step)
......@@ -1021,6 +1025,30 @@ def continuous_eval(estimator,
'Checkpoint %s no longer exists, skipping checkpoint' % ckpt)
def continuous_eval(estimator,
model_dir,
input_fn,
train_steps,
name,
max_retries=0):
"""Performs continuous evaluation on checkpoints written to a model directory.
Args:
estimator: Estimator object to use for evaluation.
model_dir: Model directory to read checkpoints for continuous evaluation.
input_fn: Input function to use for evaluation.
train_steps: Number of training steps. This is used to infer the last
checkpoint and stop evaluation loop.
name: Namescope for eval summary.
max_retries: Maximum number of times to retry the evaluation on encountering
a tf.errors.InvalidArgumentError. If negative, will always retry the
evaluation.
"""
for current_step, eval_results in continuous_eval_generator(
estimator, model_dir, input_fn, train_steps, name, max_retries):
tf.logging.info('Step %s, Eval results: %s', current_step, eval_results)
def populate_experiment(run_config,
hparams,
pipeline_config_path,
......
......@@ -4,6 +4,7 @@ package object_detection.protos;
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/post_processing.proto";
// Configuration for the CenterNet meta architecture from the "Objects as
// Points" paper [1]
......@@ -271,6 +272,13 @@ message CenterNet {
optional TemporalOffsetEstimation temporal_offset_task = 12;
// CenterNet does not apply conventional post processing operations such as
// non max suppression as it applies a max-pool operator on box centers.
// However, in some cases we observe the need to remove duplicate predictions
// from CenterNet. Use this optional parameter to apply traditional non max
// suppression and score thresholding.
optional PostProcessing post_processing = 24;
}
message CenterNetFeatureExtractor {
......
......@@ -42,6 +42,8 @@ message Hyperparams {
// Note that if nothing below is selected, then no normalization is applied
// BatchNorm hyperparameters.
BatchNorm batch_norm = 5;
// SyncBatchNorm hyperparameters (KerasLayerHyperparams only).
BatchNorm sync_batch_norm = 9;
// GroupNorm hyperparameters. This is only supported on a subset of models.
// Note that the current implementation of group norm instantiated in
// tf.contrib.group.layers.group_norm() only supports fixed_size_resizer
......
......@@ -30,7 +30,7 @@ enum InputType {
TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input
}
// Next id: 37
// Next id: 38
message InputReader {
// Name of input reader. Typically used to describe the dataset that is read
// by this input reader.
......@@ -134,6 +134,9 @@ message InputReader {
// Whether to load track information.
optional bool load_track_id = 33 [default = false];
// Whether to load keypoint depth features.
optional bool load_keypoint_depth_features = 37 [default = false];
// Whether to use the display name when decoding examples. This is only used
// when mapping class text strings to integers.
optional bool use_display_name = 17 [default = false];
......@@ -158,12 +161,17 @@ message InputReader {
//
// The number of weights must match the number of input files configured.
//
// When set, shuffling, shuffle buffer size, and num_readers settings are
// The number of input readers per dataset is num_readers, scaled relative to
// the dataset weight.
//
// When set, shuffling and shuffle buffer size, settings are
// applied individually to each dataset.
//
// Implementation follows tf.data.experimental.sample_from_datasets sampling
// strategy. Weights may take any value - only relative weights matter.
// Zero weights will result in a dataset not being sampled.
//
// Zero weights will result in a dataset not being sampled and no input
// readers spawned.
//
// Examples, assuming two input files configured:
//
......
......@@ -254,7 +254,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
"""
for image_id in image_ids:
if image_id in self._image_ids:
raise ValueError('Image with id {} already added.'.format(image_id))
logging.warning('Image with id %s already added.', image_id)
self._evaluation.merge_internal_state(state_tuple)
......@@ -321,7 +321,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
raise error if instance masks are not in groundtruth dictionary.
"""
if image_id in self._image_ids:
raise ValueError('Image with id {} already added.'.format(image_id))
logging.warning('Image with id %s already added.', image_id)
groundtruth_classes = (
groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] -
......@@ -729,7 +729,7 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
ValueError: On adding groundtruth for an image more than once.
"""
if image_id in self._image_ids:
raise ValueError('Image with id {} already added.'.format(image_id))
logging.warning('Image with id %s already added.', image_id)
groundtruth_classes = (
groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] -
......
......@@ -524,30 +524,6 @@ class PascalEvaluationTest(tf.test.TestCase):
pascal_evaluator.clear()
self.assertFalse(pascal_evaluator._image_ids)
def test_value_error_on_duplicate_images(self):
categories = [{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'},
{'id': 3, 'name': 'elephant'}]
# Add groundtruth
pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator(
categories)
image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
pascal_evaluator.add_single_ground_truth_image_info(
image_key1,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1})
with self.assertRaises(ValueError):
pascal_evaluator.add_single_ground_truth_image_info(
image_key1,
{standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1})
class WeightedPascalEvaluationTest(tf.test.TestCase):
......@@ -659,28 +635,6 @@ class WeightedPascalEvaluationTest(tf.test.TestCase):
self.wp_eval.clear()
self.assertFalse(self.wp_eval._image_ids)
def test_value_error_on_duplicate_images(self):
# Add groundtruth
self.wp_eval = (
object_detection_evaluation.WeightedPascalDetectionEvaluator(
self.categories))
image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
self.wp_eval.add_single_ground_truth_image_info(
image_key1,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1})
with self.assertRaises(ValueError):
self.wp_eval.add_single_ground_truth_image_info(
image_key1,
{standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1})
class PrecisionAtRecallEvaluationTest(tf.test.TestCase):
......@@ -807,31 +761,6 @@ class PrecisionAtRecallEvaluationTest(tf.test.TestCase):
self.wp_eval.clear()
self.assertFalse(self.wp_eval._image_ids)
def test_value_error_on_duplicate_images(self):
# Add groundtruth
self.wp_eval = (
object_detection_evaluation.PrecisionAtRecallDetectionEvaluator(
self.categories, recall_lower_bound=0.0, recall_upper_bound=0.5))
image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
self.wp_eval.add_single_ground_truth_image_info(
image_key1, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1
})
with self.assertRaises(ValueError):
self.wp_eval.add_single_ground_truth_image_info(
image_key1, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1
})
class ObjectDetectionEvaluationTest(tf.test.TestCase):
......
"""Setup script for object_detection."""
from setuptools import find_packages
from setuptools import setup
REQUIRED_PACKAGES = ['Pillow>=1.0', 'Matplotlib>=2.1', 'Cython>=0.28.1']
setup(
name='object_detection',
version='0.1',
install_requires=REQUIRED_PACKAGES,
include_package_data=True,
packages=[p for p in find_packages() if p.startswith('object_detection')],
description='Tensorflow Object Detection Library',
)
......@@ -65,15 +65,6 @@ You will need to register in order to download the data. Download the following
* leftImg8bit_sequence_trainvaltest.zip
* camera_trainvaltest.zip
### Download Bike dataset (17GB) (optional)
```shell
mkdir -p ~/vid2depth/bike-uncompressed
cd ~/vid2depth/bike-uncompressed
wget https://storage.googleapis.com/brain-robotics-data/bike/BikeVideoDataset.tar
tar xvf BikeVideoDataset.tar
```
## 3. Inference
### Download trained model
......@@ -122,18 +113,6 @@ python dataset/gen_data.py \
--seq_length 3
```
### Prepare Bike training sequences (optional)
```shell
# Prepare training sequences.
cd tensorflow/models/research/vid2depth
python dataset/gen_data.py \
--dataset_name bike \
--dataset_dir ~/vid2depth/bike-uncompressed \
--data_dir ~/vid2depth/data/bike \
--seq_length 3
```
### Compile the ICP op (work in progress)
The ICP op depends on multiple software packages (TensorFlow, Point Cloud
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment