Unverified Commit fd7b6887 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #3293 from pkulzc/master

Internal changes of object_detection 
parents f98ec55e 1efe98bb
......@@ -20,7 +20,7 @@ import tensorflow as tf
from object_detection.core import matcher
class AnchorMatcherTest(tf.test.TestCase):
class MatchTest(tf.test.TestCase):
def test_get_correct_matched_columnIndices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
......@@ -145,6 +145,48 @@ class AnchorMatcherTest(tf.test.TestCase):
self.assertAllEqual(all_indices_sorted,
np.arange(num_matches, dtype=np.int32))
def test_scalar_gather_based_on_match(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
input_tensor = tf.constant([0, 1, 2, 3, 4, 5, 6, 7], dtype=tf.float32)
expected_gathered_tensor = [3, 1, 100, 0, 100, 5, 200]
match = matcher.Match(match_results)
gathered_tensor = match.gather_based_on_match(input_tensor,
unmatched_value=100.,
ignored_value=200.)
self.assertEquals(gathered_tensor.dtype, tf.float32)
with self.test_session():
gathered_tensor_out = gathered_tensor.eval()
self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
def test_multidimensional_gather_based_on_match(self):
match_results = tf.constant([1, -1, -2])
input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
dtype=tf.float32)
expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
match = matcher.Match(match_results)
gathered_tensor = match.gather_based_on_match(input_tensor,
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
self.assertEquals(gathered_tensor.dtype, tf.float32)
with self.test_session():
gathered_tensor_out = gathered_tensor.eval()
self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
def test_multidimensional_gather_based_on_match_with_matmul_gather_op(self):
match_results = tf.constant([1, -1, -2])
input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
dtype=tf.float32)
expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
match = matcher.Match(match_results, use_matmul_gather=True)
gathered_tensor = match.gather_based_on_match(input_tensor,
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
self.assertEquals(gathered_tensor.dtype, tf.float32)
with self.test_session() as sess:
self.assertTrue(
all([op.name is not 'Gather' for op in sess.graph.get_operations()]))
gathered_tensor_out = gathered_tensor.eval()
self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
if __name__ == '__main__':
tf.test.main()
......@@ -39,6 +39,17 @@ resize/reshaping necessary (see docstring for the preprocess function).
Output classes are always integers in the range [0, num_classes). Any mapping
of these integers to semantic labels is to be handled outside of this class.
Images are resized in the `preprocess` method. All of `preprocess`, `predict`,
and `postprocess` should be reentrant.
The `preprocess` method runs `image_resizer_fn` that returns resized_images and
`true_image_shapes`. Since `image_resizer_fn` can pad the images with zeros,
true_image_shapes indicate the slices that contain the image without padding.
This is useful for padding images to be a fixed size for batching.
The `postprocess` method uses the true image shapes to clip predictions that lie
outside of images.
By default, DetectionModels produce bounding box detections; However, we support
a handful of auxiliary annotations associated with each bounding box, namely,
instance masks and keypoints.
......@@ -106,12 +117,12 @@ class DetectionModel(object):
This function is responsible for any scaling/shifting of input values that
is necessary prior to running the detector on an input image.
It is also responsible for any resizing that might be necessary as images
are assumed to arrive in arbitrary sizes. While this function could
conceivably be part of the predict method (below), it is often convenient
to keep these separate --- for example, we may want to preprocess on one
device, place onto a queue, and let another device (e.g., the GPU) handle
prediction.
It is also responsible for any resizing, padding that might be necessary
as images are assumed to arrive in arbitrary sizes. While this function
could conceivably be part of the predict method (below), it is often
convenient to keep these separate --- for example, we may want to preprocess
on one device, place onto a queue, and let another device (e.g., the GPU)
handle prediction.
A few important notes about the preprocess function:
+ We assume that this operation does not have any trainable variables nor
......@@ -134,11 +145,15 @@ class DetectionModel(object):
Returns:
preprocessed_inputs: a [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
"""
pass
@abstractmethod
def predict(self, preprocessed_inputs):
def predict(self, preprocessed_inputs, true_image_shapes):
"""Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions.
......@@ -146,6 +161,10 @@ class DetectionModel(object):
Args:
preprocessed_inputs: a [batch, height, width, channels] float32 tensor
representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns:
prediction_dict: a dictionary holding prediction tensors to be
......@@ -154,7 +173,7 @@ class DetectionModel(object):
pass
@abstractmethod
def postprocess(self, prediction_dict, **params):
def postprocess(self, prediction_dict, true_image_shapes, **params):
"""Convert predicted output tensors to final detections.
Outputs adhere to the following conventions:
......@@ -172,6 +191,10 @@ class DetectionModel(object):
Args:
prediction_dict: a dictionary holding prediction tensors.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
**params: Additional keyword arguments for specific implementations of
DetectionModel.
......@@ -190,7 +213,7 @@ class DetectionModel(object):
pass
@abstractmethod
def loss(self, prediction_dict):
def loss(self, prediction_dict, true_image_shapes):
"""Compute scalar loss tensors with respect to provided groundtruth.
Calling this function requires that groundtruth tensors have been
......@@ -198,6 +221,10 @@ class DetectionModel(object):
Args:
prediction_dict: a dictionary holding predicted tensors
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns:
a dictionary mapping strings (loss names) to scalar tensors representing
......@@ -209,7 +236,8 @@ class DetectionModel(object):
groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list=None,
groundtruth_keypoints_list=None):
groundtruth_keypoints_list=None,
groundtruth_weights_list=None):
"""Provide groundtruth tensors.
Args:
......@@ -230,10 +258,15 @@ class DetectionModel(object):
shape [num_boxes, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and
missing keypoints should be encoded as NaN.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
"""
self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
self._groundtruth_lists[
fields.BoxListFields.classes] = groundtruth_classes_list
if groundtruth_weights_list:
self._groundtruth_lists[fields.BoxListFields.
weights] = groundtruth_weights_list
if groundtruth_masks_list:
self._groundtruth_lists[
fields.BoxListFields.masks] = groundtruth_masks_list
......
......@@ -20,6 +20,7 @@ import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import standard_fields as fields
from object_detection.utils import shape_utils
def multiclass_non_max_suppression(boxes,
......@@ -31,6 +32,7 @@ def multiclass_non_max_suppression(boxes,
clip_window=None,
change_coordinate_frame=False,
masks=None,
boundaries=None,
additional_fields=None,
scope=None):
"""Multi-class version of non maximum suppression.
......@@ -66,6 +68,9 @@ def multiclass_non_max_suppression(boxes,
masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
containing box masks. `q` can be either number of classes or 1 depending
on whether a separate mask is predicted per class.
boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
tensor containing box boundaries. `q` can be either number of classes or 1
depending on whether a separate boundary is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose first dimensions are all of size `k`. After non-maximum
suppression, all tensors corresponding to the selected boxes will be
......@@ -114,6 +119,8 @@ def multiclass_non_max_suppression(boxes,
per_class_boxes_list = tf.unstack(boxes, axis=1)
if masks is not None:
per_class_masks_list = tf.unstack(masks, axis=1)
if boundaries is not None:
per_class_boundaries_list = tf.unstack(boundaries, axis=1)
boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
else [0] * num_classes)
for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
......@@ -128,6 +135,10 @@ def multiclass_non_max_suppression(boxes,
per_class_masks = per_class_masks_list[boxes_idx]
boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
per_class_masks)
if boundaries is not None:
per_class_boundaries = per_class_boundaries_list[boxes_idx]
boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries,
per_class_boundaries)
if additional_fields is not None:
for key, tensor in additional_fields.items():
boxlist_and_class_scores.add_field(key, tensor)
......@@ -194,9 +205,12 @@ def batch_multiclass_non_max_suppression(boxes,
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
representing the window to clip boxes to before performing non-max
suppression.
clip_window: A float32 tensor of shape [batch_size, 4] where each entry is
of the form [y_min, x_min, y_max, x_max] representing the window to clip
boxes to before performing non-max suppression. This argument can also be
a tensor of shape [4] in which case, the same clip window is applied to
all images in the batch. If clip_widow is None, all boxes are used to
perform non-max suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
is provided)
......@@ -242,7 +256,9 @@ def batch_multiclass_non_max_suppression(boxes,
if q != 1 and q != num_classes:
raise ValueError('third dimension of boxes must be either 1 or equal '
'to the third dimension of scores')
if change_coordinate_frame and clip_window is None:
raise ValueError('if change_coordinate_frame is True, then a clip_window'
'must be specified.')
original_masks = masks
original_additional_fields = additional_fields
with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
......@@ -266,6 +282,16 @@ def batch_multiclass_non_max_suppression(boxes,
masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
masks = tf.zeros(masks_shape)
if clip_window is None:
clip_window = tf.stack([
tf.reduce_min(boxes[:, :, :, 0]),
tf.reduce_min(boxes[:, :, :, 1]),
tf.reduce_max(boxes[:, :, :, 2]),
tf.reduce_max(boxes[:, :, :, 3])
])
if clip_window.shape.ndims == 1:
clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1])
if additional_fields is None:
additional_fields = {}
......@@ -283,6 +309,9 @@ def batch_multiclass_non_max_suppression(boxes,
per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
per_image_clip_window - A 1D float32 tensor of the form
[ymin, xmin, ymax, xmax] representing the window to clip the boxes
to.
per_image_additional_fields - (optional) A variable number of float32
tensors each with size [num_anchors, ...].
per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
......@@ -311,9 +340,10 @@ def batch_multiclass_non_max_suppression(boxes,
per_image_boxes = args[0]
per_image_scores = args[1]
per_image_masks = args[2]
per_image_clip_window = args[3]
per_image_additional_fields = {
key: value
for key, value in zip(additional_fields, args[3:-1])
for key, value in zip(additional_fields, args[4:-1])
}
per_image_num_valid_boxes = args[-1]
per_image_boxes = tf.reshape(
......@@ -345,7 +375,7 @@ def batch_multiclass_non_max_suppression(boxes,
iou_thresh,
max_size_per_class,
max_total_size,
clip_window=clip_window,
clip_window=per_image_clip_window,
change_coordinate_frame=change_coordinate_frame,
masks=per_image_masks,
additional_fields=per_image_additional_fields)
......@@ -367,10 +397,10 @@ def batch_multiclass_non_max_suppression(boxes,
num_additional_fields = len(additional_fields)
num_nmsed_outputs = 4 + num_additional_fields
batch_outputs = tf.map_fn(
batch_outputs = shape_utils.static_or_dynamic_map_fn(
_single_image_nms_fn,
elems=([boxes, scores, masks] + list(additional_fields.values()) +
[num_valid_boxes]),
elems=([boxes, scores, masks, clip_window] +
list(additional_fields.values()) + [num_valid_boxes]),
dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
parallel_iterations=parallel_iterations)
......
......@@ -571,6 +571,125 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
self.assertAllClose(nmsed_classes, exp_nms_classes)
self.assertAllClose(num_detections, [2, 3])
def test_batch_multiclass_nms_with_per_batch_clip_window(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
tf.float32)
scores = tf.constant([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]])
clip_window = tf.constant([0., 0., 200., 200.])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = np.array([[[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 10.1, 1, 11.1],
[0, 100, 1, 101],
[0, 0, 0, 0],
[0, 0, 0, 0]]])
exp_nms_scores = np.array([[.95, .9, 0, 0],
[.5, .3, 0, 0]])
exp_nms_classes = np.array([[0, 0, 0, 0],
[0, 0, 0, 0]])
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
nmsed_additional_fields, num_detections
) = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size,
clip_window=clip_window)
self.assertIsNone(nmsed_masks)
self.assertIsNone(nmsed_additional_fields)
# Check static shapes
self.assertAllEqual(nmsed_boxes.shape.as_list(),
exp_nms_corners.shape)
self.assertAllEqual(nmsed_scores.shape.as_list(),
exp_nms_scores.shape)
self.assertAllEqual(nmsed_classes.shape.as_list(),
exp_nms_classes.shape)
self.assertEqual(num_detections.shape.as_list(), [2])
with self.test_session() as sess:
(nmsed_boxes, nmsed_scores, nmsed_classes,
num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
num_detections])
self.assertAllClose(nmsed_boxes, exp_nms_corners)
self.assertAllClose(nmsed_scores, exp_nms_scores)
self.assertAllClose(nmsed_classes, exp_nms_classes)
self.assertAllClose(num_detections, [2, 2])
def test_batch_multiclass_nms_with_per_image_clip_window(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
tf.float32)
scores = tf.constant([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]])
clip_window = tf.constant([[0., 0., 5., 5.],
[0., 0., 200., 200.]])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = np.array([[[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 10.1, 1, 11.1],
[0, 100, 1, 101],
[0, 0, 0, 0],
[0, 0, 0, 0]]])
exp_nms_scores = np.array([[.9, 0., 0., 0.],
[.5, .3, 0, 0]])
exp_nms_classes = np.array([[0, 0, 0, 0],
[0, 0, 0, 0]])
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
nmsed_additional_fields, num_detections
) = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size,
clip_window=clip_window)
self.assertIsNone(nmsed_masks)
self.assertIsNone(nmsed_additional_fields)
# Check static shapes
self.assertAllEqual(nmsed_boxes.shape.as_list(),
exp_nms_corners.shape)
self.assertAllEqual(nmsed_scores.shape.as_list(),
exp_nms_scores.shape)
self.assertAllEqual(nmsed_classes.shape.as_list(),
exp_nms_classes.shape)
self.assertEqual(num_detections.shape.as_list(), [2])
with self.test_session() as sess:
(nmsed_boxes, nmsed_scores, nmsed_classes,
num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
num_detections])
self.assertAllClose(nmsed_boxes, exp_nms_corners)
self.assertAllClose(nmsed_scores, exp_nms_scores)
self.assertAllClose(nmsed_classes, exp_nms_classes)
self.assertAllClose(num_detections, [1, 2])
def test_batch_multiclass_nms_with_masks(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
......
......@@ -35,6 +35,27 @@ in each row there is a box with [ymin xmin ymax xmax].
Boxes are in normalized coordinates meaning
their coordinate values range in [0, 1]
To preprocess multiple images with the same operations in cases where
nondeterministic operations are used, a preprocessor_cache.PreprocessorCache
object can be passed into the preprocess function or individual operations.
All nondeterministic operations except random_jitter_boxes support caching.
E.g.
Let tensor_dict{1,2,3,4,5} be copies of the same inputs.
Let preprocess_options contain nondeterministic operation(s) excluding
random_jitter_boxes.
cache1 = preprocessor_cache.PreprocessorCache()
cache2 = preprocessor_cache.PreprocessorCache()
a = preprocess(tensor_dict1, preprocess_options, preprocess_vars_cache=cache1)
b = preprocess(tensor_dict2, preprocess_options, preprocess_vars_cache=cache1)
c = preprocess(tensor_dict3, preprocess_options, preprocess_vars_cache=cache2)
d = preprocess(tensor_dict4, preprocess_options, preprocess_vars_cache=cache2)
e = preprocess(tensor_dict5, preprocess_options)
Then correspondings tensors of object pairs (a,b) and (c,d)
are guaranteed to be equal element-wise, but the equality of any other object
pair cannot be determined.
Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
functions receive a rank 3 tensor for processing the image. Thus, inside the
preprocess function we squeeze the image to become a rank 3 tensor and then
......@@ -42,6 +63,8 @@ we pass it to the functions. At the end of the preprocess we expand the image
back to rank 4.
"""
import functools
import inspect
import sys
import tensorflow as tf
......@@ -50,44 +73,79 @@ from tensorflow.python.ops import control_flow_ops
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import keypoint_ops
from object_detection.core import preprocessor_cache
from object_detection.core import standard_fields as fields
from object_detection.utils import shape_utils
def _apply_with_random_selector(x, func, num_cases):
def _apply_with_random_selector(x,
func,
num_cases,
preprocess_vars_cache=None,
key=''):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
If both preprocess_vars_cache AND key are the same between two calls, sel will
be the same value in both calls.
Args:
x: input Tensor.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: variable identifier for preprocess_vars_cache.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
generator_func = functools.partial(
tf.random_uniform, [], maxval=num_cases, dtype=tf.int32)
rand_sel = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.SELECTOR,
preprocess_vars_cache, key)
# Pass the real x only to one of the func calls.
return control_flow_ops.merge([func(
control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case)
for case in range(num_cases)])[0]
def _apply_with_random_selector_tuples(x, func, num_cases):
def _apply_with_random_selector_tuples(x,
func,
num_cases,
preprocess_vars_cache=None,
key=''):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
If both preprocess_vars_cache AND key are the same between two calls, sel will
be the same value in both calls.
Args:
x: A tuple of input tensors.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: variable identifier for preprocess_vars_cache.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
num_inputs = len(x)
rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
# Pass the real x only to one of the func calls.
generator_func = functools.partial(
tf.random_uniform, [], maxval=num_cases, dtype=tf.int32)
rand_sel = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.SELECTOR_TUPLES,
preprocess_vars_cache, key)
# Pass the real x only to one of the func calls.
tuples = [list() for t in x]
for case in range(num_cases):
new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x]
......@@ -100,6 +158,37 @@ def _apply_with_random_selector_tuples(x, func, num_cases):
return tuple(tuples)
def _get_or_create_preprocess_rand_vars(generator_func,
function_id,
preprocess_vars_cache,
key=''):
"""Returns a tensor stored in preprocess_vars_cache or using generator_func.
If the tensor was previously generated and appears in the PreprocessorCache,
the previously generated tensor will be returned. Otherwise, a new tensor
is generated using generator_func and stored in the cache.
Args:
generator_func: A 0-argument function that generates a tensor.
function_id: identifier for the preprocessing function used.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: identifier for the variable stored.
Returns:
The generated tensor.
"""
if preprocess_vars_cache is not None:
var = preprocess_vars_cache.get(function_id, key)
if var is None:
var = generator_func()
preprocess_vars_cache.update(function_id, key, var)
else:
var = generator_func()
return var
def _random_integer(minval, maxval, seed):
"""Returns a random 0-D tensor between minval and maxval.
......@@ -115,6 +204,40 @@ def _random_integer(minval, maxval, seed):
[], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
# TODO: This method is needed because the current
# tf.image.rgb_to_grayscale method does not support quantization. Replace with
# tf.image.rgb_to_grayscale after quantization support is added.
def _rgb_to_grayscale(images, name=None):
"""Converts one or more images from RGB to Grayscale.
Outputs a tensor of the same `DType` and rank as `images`. The size of the
last dimension of the output is 1, containing the Grayscale value of the
pixels.
Args:
images: The RGB tensor to convert. Last dimension must have size 3 and
should contain RGB values.
name: A name for the operation (optional).
Returns:
The converted grayscale image(s).
"""
with tf.name_scope(name, 'rgb_to_grayscale', [images]) as name:
images = tf.convert_to_tensor(images, name='images')
# Remember original dtype to so we can convert back if needed
orig_dtype = images.dtype
flt_image = tf.image.convert_image_dtype(images, tf.float32)
# Reference for converting between RGB and grayscale.
# https://en.wikipedia.org/wiki/Luma_%28video%29
rgb_weights = [0.2989, 0.5870, 0.1140]
rank_1 = tf.expand_dims(tf.rank(images) - 1, 0)
gray_float = tf.reduce_sum(
flt_image * rgb_weights, rank_1, keepdims=True)
gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
return tf.image.convert_image_dtype(gray_float, orig_dtype, name=name)
def normalize_image(image, original_minval, original_maxval, target_minval,
target_maxval):
"""Normalizes pixel values in the image.
......@@ -312,7 +435,8 @@ def random_horizontal_flip(image,
masks=None,
keypoints=None,
keypoint_flip_permutation=None,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Randomly flips the image and detections horizontally.
The probability of flipping the image is 50%.
......@@ -333,6 +457,10 @@ def random_horizontal_flip(image,
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
......@@ -364,7 +492,12 @@ def random_horizontal_flip(image,
with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
result = []
# random variable defining whether to do flip or not
do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
generator_func = functools.partial(tf.random_uniform, [], seed=seed)
do_a_flip_random = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.HORIZONTAL_FLIP,
preprocess_vars_cache)
do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
# flip image
image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
......@@ -399,7 +532,8 @@ def random_vertical_flip(image,
masks=None,
keypoints=None,
keypoint_flip_permutation=None,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Randomly flips the image and detections vertically.
The probability of flipping the image is 50%.
......@@ -420,6 +554,10 @@ def random_vertical_flip(image,
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
......@@ -451,7 +589,11 @@ def random_vertical_flip(image,
with tf.name_scope('RandomVerticalFlip', values=[image, boxes]):
result = []
# random variable defining whether to do flip or not
do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
generator_func = functools.partial(tf.random_uniform, [], seed=seed)
do_a_flip_random = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.VERTICAL_FLIP,
preprocess_vars_cache)
do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
# flip image
image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
......@@ -485,7 +627,8 @@ def random_rotation90(image,
boxes=None,
masks=None,
keypoints=None,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Randomly rotates the image and detections 90 degrees counter-clockwise.
The probability of rotating the image is 50%. This can be combined with
......@@ -507,6 +650,10 @@ def random_rotation90(image,
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
......@@ -532,7 +679,11 @@ def random_rotation90(image,
result = []
# random variable defining whether to rotate by 90 degrees or not
do_a_rot90_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
generator_func = functools.partial(tf.random_uniform, [], seed=seed)
do_a_rot90_random = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.ROTATION90,
preprocess_vars_cache)
do_a_rot90_random = tf.greater(do_a_rot90_random, 0.5)
# flip image
image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image),
......@@ -562,7 +713,11 @@ def random_rotation90(image,
return tuple(result)
def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
def random_pixel_value_scale(image,
minval=0.9,
maxval=1.1,
seed=None,
preprocess_vars_cache=None):
"""Scales each value in the pixels of the image.
This function scales each pixel independent of the other ones.
......@@ -575,17 +730,24 @@ def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
minval: lower ratio of scaling pixel values.
maxval: upper ratio of scaling pixel values.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomPixelValueScale', values=[image]):
color_coef = tf.random_uniform(
tf.shape(image),
minval=minval,
maxval=maxval,
dtype=tf.float32,
seed=seed)
generator_func = functools.partial(
tf.random_uniform, tf.shape(image),
minval=minval, maxval=maxval,
dtype=tf.float32, seed=seed)
color_coef = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.PIXEL_VALUE_SCALE,
preprocess_vars_cache)
image = tf.multiply(image, color_coef)
image = tf.clip_by_value(image, 0.0, 1.0)
......@@ -596,7 +758,8 @@ def random_image_scale(image,
masks=None,
min_scale_ratio=0.5,
max_scale_ratio=2.0,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Scales the image size.
Args:
......@@ -607,6 +770,10 @@ def random_image_scale(image,
min_scale_ratio: minimum scaling ratio.
max_scale_ratio: maximum scaling ratio.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
......@@ -618,10 +785,14 @@ def random_image_scale(image,
image_shape = tf.shape(image)
image_height = image_shape[0]
image_width = image_shape[1]
size_coef = tf.random_uniform([],
minval=min_scale_ratio,
maxval=max_scale_ratio,
dtype=tf.float32, seed=seed)
generator_func = functools.partial(
tf.random_uniform, [],
minval=min_scale_ratio, maxval=max_scale_ratio,
dtype=tf.float32, seed=seed)
size_coef = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.IMAGE_SCALE,
preprocess_vars_cache)
image_newysize = tf.to_int32(
tf.multiply(tf.to_float(image_height), size_coef))
image_newxsize = tf.to_int32(
......@@ -636,7 +807,10 @@ def random_image_scale(image,
return tuple(result)
def random_rgb_to_gray(image, probability=0.1, seed=None):
def random_rgb_to_gray(image,
probability=0.1,
seed=None,
preprocess_vars_cache=None):
"""Changes the image from RGB to Grayscale with the given probability.
Args:
......@@ -645,18 +819,25 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
probability: the probability of returning a grayscale image.
The probability should be a number between [0, 1].
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
"""
def _image_to_gray(image):
image_gray1 = tf.image.rgb_to_grayscale(image)
image_gray1 = _rgb_to_grayscale(image)
image_gray3 = tf.image.grayscale_to_rgb(image_gray1)
return image_gray3
with tf.name_scope('RandomRGBtoGray', values=[image]):
# random variable defining whether to do flip or not
do_gray_random = tf.random_uniform([], seed=seed)
# random variable defining whether to change to grayscale or not
generator_func = functools.partial(tf.random_uniform, [], seed=seed)
do_gray_random = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.RGB_TO_GRAY,
preprocess_vars_cache)
image = tf.cond(
tf.greater(do_gray_random, probability), lambda: image,
......@@ -665,7 +846,10 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
return image
def random_adjust_brightness(image, max_delta=0.2):
def random_adjust_brightness(image,
max_delta=0.2,
seed=None,
preprocess_vars_cache=None):
"""Randomly adjusts brightness.
Makes sure the output image is still between 0 and 1.
......@@ -674,18 +858,34 @@ def random_adjust_brightness(image, max_delta=0.2):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_delta: how much to change the brightness. A value between [0, 1).
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
boxes: boxes which is the same shape as input boxes.
"""
with tf.name_scope('RandomAdjustBrightness', values=[image]):
image = tf.image.random_brightness(image, max_delta)
generator_func = functools.partial(tf.random_uniform, [],
-max_delta, max_delta, seed=seed)
delta = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.ADJUST_BRIGHTNESS,
preprocess_vars_cache)
image = tf.image.adjust_brightness(image, delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
def random_adjust_contrast(image,
min_delta=0.8,
max_delta=1.25,
seed=None,
preprocess_vars_cache=None):
"""Randomly adjusts contrast.
Makes sure the output image is still between 0 and 1.
......@@ -697,17 +897,31 @@ def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
max_delta: how much to change the contrast. Contrast will change with a
value between min_delta and max_delta. This value will be
multiplied to the current contrast of the image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomAdjustContrast', values=[image]):
image = tf.image.random_contrast(image, min_delta, max_delta)
generator_func = functools.partial(tf.random_uniform, [],
min_delta, max_delta, seed=seed)
contrast_factor = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.ADJUST_CONTRAST,
preprocess_vars_cache)
image = tf.image.adjust_contrast(image, contrast_factor)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_adjust_hue(image, max_delta=0.02):
def random_adjust_hue(image,
max_delta=0.02,
seed=None,
preprocess_vars_cache=None):
"""Randomly adjusts hue.
Makes sure the output image is still between 0 and 1.
......@@ -716,17 +930,31 @@ def random_adjust_hue(image, max_delta=0.02):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_delta: change hue randomly with a value between 0 and max_delta.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomAdjustHue', values=[image]):
image = tf.image.random_hue(image, max_delta)
generator_func = functools.partial(tf.random_uniform, [],
-max_delta, max_delta, seed=seed)
delta = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.ADJUST_HUE,
preprocess_vars_cache)
image = tf.image.adjust_hue(image, delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
def random_adjust_saturation(image,
min_delta=0.8,
max_delta=1.25,
seed=None,
preprocess_vars_cache=None):
"""Randomly adjusts saturation.
Makes sure the output image is still between 0 and 1.
......@@ -738,17 +966,28 @@ def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
max_delta: how much to change the saturation. Saturation will change with a
value between min_delta and max_delta. This value will be
multiplied to the current saturation of the image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomAdjustSaturation', values=[image]):
image = tf.image.random_saturation(image, min_delta, max_delta)
generator_func = functools.partial(tf.random_uniform, [],
min_delta, max_delta, seed=seed)
saturation_factor = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.ADJUST_SATURATION,
preprocess_vars_cache)
image = tf.image.adjust_saturation(image, saturation_factor)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_distort_color(image, color_ordering=0):
def random_distort_color(image, color_ordering=0, preprocess_vars_cache=None):
"""Randomly distorts color.
Randomly distorts color using a combination of brightness, hue, contrast
......@@ -758,6 +997,10 @@ def random_distort_color(image, color_ordering=0):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
color_ordering: Python int, a type of distortion (valid values: 0, 1).
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
......@@ -767,20 +1010,34 @@ def random_distort_color(image, color_ordering=0):
"""
with tf.name_scope('RandomDistortColor', values=[image]):
if color_ordering == 0:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = random_adjust_brightness(
image, max_delta=32. / 255.,
preprocess_vars_cache=preprocess_vars_cache)
image = random_adjust_saturation(
image, min_delta=0.5, max_delta=1.5,
preprocess_vars_cache=preprocess_vars_cache)
image = random_adjust_hue(
image, max_delta=0.2,
preprocess_vars_cache=preprocess_vars_cache)
image = random_adjust_contrast(
image, min_delta=0.5, max_delta=1.5,
preprocess_vars_cache=preprocess_vars_cache)
elif color_ordering == 1:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = random_adjust_brightness(
image, max_delta=32. / 255.,
preprocess_vars_cache=preprocess_vars_cache)
image = random_adjust_contrast(
image, min_delta=0.5, max_delta=1.5,
preprocess_vars_cache=preprocess_vars_cache)
image = random_adjust_saturation(
image, min_delta=0.5, max_delta=1.5,
preprocess_vars_cache=preprocess_vars_cache)
image = random_adjust_hue(
image, max_delta=0.2,
preprocess_vars_cache=preprocess_vars_cache)
else:
raise ValueError('color_ordering must be in {0, 1}')
# The random_* ops do not necessarily clamp.
image = tf.clip_by_value(image, 0.0, 1.0)
return image
......@@ -845,7 +1102,8 @@ def _strict_random_crop_image(image,
min_object_covered=1.0,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.1, 1.0),
overlap_thresh=0.3):
overlap_thresh=0.3,
preprocess_vars_cache=None):
"""Performs random crop.
Note: boxes will be clipped to the crop. Keypoint coordinates that are
......@@ -878,6 +1136,10 @@ def _strict_random_crop_image(image,
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
......@@ -900,7 +1162,8 @@ def _strict_random_crop_image(image,
tf.clip_by_value(
boxes, clip_value_min=0.0, clip_value_max=1.0), 1)
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
generator_func = functools.partial(
tf.image.sample_distorted_bounding_box,
image_shape,
bounding_boxes=boxes_expanded,
min_object_covered=min_object_covered,
......@@ -909,6 +1172,13 @@ def _strict_random_crop_image(image,
max_attempts=100,
use_image_if_no_bounding_boxes=True)
# for ssd cropping, each value of min_object_covered has its own
# cached random variable
sample_distorted_bounding_box = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.STRICT_CROP_IMAGE,
preprocess_vars_cache, key=min_object_covered)
im_box_begin, im_box_size, im_box = sample_distorted_bounding_box
new_image = tf.slice(image, im_box_begin, im_box_size)
......@@ -984,7 +1254,8 @@ def random_crop_image(image,
area_range=(0.1, 1.0),
overlap_thresh=0.3,
random_coef=0.0,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Randomly crops the image.
Given the input image and its bounding boxes, this op randomly
......@@ -1029,6 +1300,10 @@ def random_crop_image(image,
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: Image shape will be [new_height, new_width, channels].
......@@ -1056,13 +1331,17 @@ def random_crop_image(image,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
overlap_thresh=overlap_thresh)
overlap_thresh=overlap_thresh,
preprocess_vars_cache=preprocess_vars_cache)
# avoids tf.cond to make faster RCNN training on borg. See b/140057645.
if random_coef < sys.float_info.min:
result = strict_random_crop_image_fn()
else:
do_a_crop_random = tf.random_uniform([], seed=seed)
generator_func = functools.partial(tf.random_uniform, [], seed=seed)
do_a_crop_random = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.CROP_IMAGE,
preprocess_vars_cache)
do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
outputs = [image, boxes, labels]
......@@ -1084,7 +1363,8 @@ def random_pad_image(image,
min_image_size=None,
max_image_size=None,
pad_color=None,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Randomly pads the image.
This function randomly pads the image with zeros. The final size of the
......@@ -1110,8 +1390,11 @@ def random_pad_image(image,
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the input
image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: Image shape will be [new_height, new_width, channels].
......@@ -1155,6 +1438,12 @@ def random_pad_image(image,
lambda: _random_integer(0, target_width - image_width, seed),
lambda: tf.constant(0, dtype=tf.int32))
gen_func = lambda: (target_height, target_width, offset_height, offset_width)
params = _get_or_create_preprocess_rand_vars(
gen_func, preprocessor_cache.PreprocessorCache.PAD_IMAGE,
preprocess_vars_cache)
target_height, target_width, offset_height, offset_width = params
new_image = tf.image.pad_to_bounding_box(
image,
offset_height=offset_height,
......@@ -1200,7 +1489,8 @@ def random_crop_pad_image(image,
min_padded_size_ratio=(1.0, 1.0),
max_padded_size_ratio=(2.0, 2.0),
pad_color=None,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Randomly crops and pads the image.
Given an input image and its bounding boxes, this op first randomly crops
......@@ -1241,6 +1531,10 @@ def random_crop_pad_image(image,
if set as None, it will be set to average color of the randomly
cropped image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
padded_image: padded image.
......@@ -1263,7 +1557,8 @@ def random_crop_pad_image(image,
area_range=area_range,
overlap_thresh=overlap_thresh,
random_coef=random_coef,
seed=seed)
seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
cropped_image, cropped_boxes, cropped_labels = result[:3]
......@@ -1280,7 +1575,8 @@ def random_crop_pad_image(image,
min_image_size=min_image_size,
max_image_size=max_image_size,
pad_color=pad_color,
seed=seed)
seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
cropped_padded_output = (padded_image, padded_boxes, cropped_labels)
......@@ -1299,7 +1595,8 @@ def random_crop_to_aspect_ratio(image,
keypoints=None,
aspect_ratio=1.0,
overlap_thresh=0.3,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Randomly crops an image to the specified aspect ratio.
Randomly crops the a portion of the image such that the crop is of the
......@@ -1331,6 +1628,10 @@ def random_crop_to_aspect_ratio(image,
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
......@@ -1374,6 +1675,13 @@ def random_crop_to_aspect_ratio(image,
# offset_height is randomly chosen from [0, offset_height - target_height)
offset_height = _random_integer(0, orig_height - target_height + 1, seed)
offset_width = _random_integer(0, orig_width - target_width + 1, seed)
generator_func = lambda: (offset_height, offset_width)
offset_height, offset_width = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.CROP_TO_ASPECT_RATIO,
preprocess_vars_cache)
new_image = tf.image.crop_to_bounding_box(
image, offset_height, offset_width, target_height, target_width)
......@@ -1436,7 +1744,8 @@ def random_pad_to_aspect_ratio(image,
aspect_ratio=1.0,
min_padded_size_ratio=(1.0, 1.0),
max_padded_size_ratio=(2.0, 2.0),
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Randomly zero pads an image to the specified aspect ratio.
Pads the image so that the resulting image will have the specified aspect
......@@ -1464,6 +1773,10 @@ def random_pad_to_aspect_ratio(image,
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
......@@ -1510,7 +1823,13 @@ def random_pad_to_aspect_ratio(image,
min_scale = tf.maximum(min_height / target_height, min_width / target_width)
max_scale = tf.minimum(max_height / target_height, max_width / target_width)
scale = tf.random_uniform([], min_scale, max_scale, seed=seed)
generator_func = functools.partial(tf.random_uniform, [],
min_scale, max_scale, seed=seed)
scale = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.PAD_TO_ASPECT_RATIO,
preprocess_vars_cache)
target_height = scale * target_height
target_width = scale * target_width
......@@ -1549,7 +1868,8 @@ def random_black_patches(image,
max_black_patches=10,
probability=0.5,
size_to_image_ratio=0.1,
random_seed=None):
random_seed=None,
preprocess_vars_cache=None):
"""Randomly adds some black patches to the image.
This op adds up to max_black_patches square black patches of a fixed size
......@@ -1566,15 +1886,20 @@ def random_black_patches(image,
box_size = size_to_image_ratio *
min(image_width, image_height)
random_seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image
"""
def add_black_patch_to_image(image):
def add_black_patch_to_image(image, idx):
"""Function for adding one patch to the image.
Args:
image: image
idx: counter for number of patches that could have been added
Returns:
image with a randomly added black box
......@@ -1586,10 +1911,19 @@ def random_black_patches(image,
tf.multiply(
tf.minimum(tf.to_float(image_height), tf.to_float(image_width)),
size_to_image_ratio))
normalized_y_min = tf.random_uniform(
[], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
normalized_x_min = tf.random_uniform(
[], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
generator_func = functools.partial(tf.random_uniform, [], minval=0.0,
maxval=(1.0 - size_to_image_ratio),
seed=random_seed)
normalized_y_min = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH,
preprocess_vars_cache, key=str(idx) + 'y')
normalized_x_min = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH,
preprocess_vars_cache, key=str(idx) + 'x')
y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height))
x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width))
black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32)
......@@ -1599,13 +1933,17 @@ def random_black_patches(image,
return image
with tf.name_scope('RandomBlackPatchInImage', values=[image]):
for _ in range(max_black_patches):
random_prob = tf.random_uniform(
[], minval=0.0, maxval=1.0, dtype=tf.float32, seed=random_seed)
for idx in range(max_black_patches):
generator_func = functools.partial(tf.random_uniform, [],
minval=0.0, maxval=1.0,
dtype=tf.float32, seed=random_seed)
random_prob = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.BLACK_PATCHES,
preprocess_vars_cache, key=idx)
image = tf.cond(
tf.greater(random_prob, probability), lambda: image,
lambda: add_black_patch_to_image(image))
functools.partial(add_black_patch_to_image, image=image, idx=idx))
return image
......@@ -1623,12 +1961,16 @@ def image_to_float(image):
return image
def random_resize_method(image, target_size):
def random_resize_method(image, target_size, preprocess_vars_cache=None):
"""Uses a random resize method to resize the image to target size.
Args:
image: a rank 3 tensor.
target_size: a list of [target_height, target_width]
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
resized image.
......@@ -1637,7 +1979,9 @@ def random_resize_method(image, target_size):
resized_image = _apply_with_random_selector(
image,
lambda x, method: tf.image.resize_images(x, target_size, method),
num_cases=4)
num_cases=4,
preprocess_vars_cache=preprocess_vars_cache,
key=preprocessor_cache.PreprocessorCache.RESIZE_METHOD)
return resized_image
......@@ -1647,6 +1991,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
image_shape = image.get_shape().as_list()
orig_height = image_shape[0]
orig_width = image_shape[1]
num_channels = image_shape[2]
orig_min_dim = min(orig_height, orig_width)
# Calculates the larger of the possible sizes
large_scale_factor = min_dimension / float(orig_min_dim)
......@@ -1674,7 +2019,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
new_size = small_size
else:
new_size = large_size
return tf.constant(new_size)
return tf.constant(new_size + [num_channels])
def _compute_new_dynamic_size(image, min_dimension, max_dimension):
......@@ -1682,6 +2027,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
image_shape = tf.shape(image)
orig_height = tf.to_float(image_shape[0])
orig_width = tf.to_float(image_shape[1])
num_channels = image_shape[2]
orig_min_dim = tf.minimum(orig_height, orig_width)
# Calculates the larger of the possible sizes
min_dimension = tf.constant(min_dimension, dtype=tf.float32)
......@@ -1711,7 +2057,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
lambda: small_size, lambda: large_size)
else:
new_size = large_size
return new_size
return tf.stack(tf.unstack(new_size) + [num_channels])
def resize_to_range(image,
......@@ -1719,7 +2065,8 @@ def resize_to_range(image,
min_dimension=None,
max_dimension=None,
method=tf.image.ResizeMethod.BILINEAR,
align_corners=False):
align_corners=False,
pad_to_max_dimension=False):
"""Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases:
......@@ -1740,15 +2087,22 @@ def resize_to_range(image,
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
pad_to_max_dimension: Whether to resize the image and pad it with zeros
so the resulting image is of the spatial size
[max_dimension, max_dimension]. If masks are included they are padded
similarly.
Returns:
A 3D tensor of shape [new_height, new_width, channels],
where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
max(new_height, new_width) == max_dimension.
If masks is not None, also outputs masks:
A 3D tensor of shape [num_instances, new_height, new_width]
Note that the position of the resized_image_shape changes based on whether
masks are present.
resized_image: A 3D tensor of shape [new_height, new_width, channels],
where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
max(new_height, new_width) == max_dimension.
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width].
resized_image_shape: A 1D tensor of shape [3] containing shape of the
resized image.
Raises:
ValueError: if the image is not a 3D tensor.
......@@ -1762,16 +2116,27 @@ def resize_to_range(image,
else:
new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
new_image = tf.image.resize_images(
image, new_size, method=method, align_corners=align_corners)
image, new_size[:-1], method=method, align_corners=align_corners)
result = new_image
if pad_to_max_dimension:
new_image = tf.image.pad_to_bounding_box(
new_image, 0, 0, max_dimension, max_dimension)
result = [new_image]
if masks is not None:
new_masks = tf.expand_dims(masks, 3)
new_masks = tf.image.resize_nearest_neighbor(
new_masks, new_size, align_corners=align_corners)
new_masks = tf.image.resize_images(
new_masks,
new_size[:-1],
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=align_corners)
new_masks = tf.squeeze(new_masks, 3)
result = [new_image, new_masks]
if pad_to_max_dimension:
new_masks = tf.image.pad_to_bounding_box(
new_masks, 0, 0, max_dimension, max_dimension)
result.append(new_masks)
result.append(new_size)
return result
......@@ -1789,10 +2154,13 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
min_dimension: minimum image dimension.
Returns:
a tuple containing the following:
Resized image. A tensor of size [new_height, new_width, channels].
(optional) Resized masks. A tensor of
size [num_instances, new_height, new_width].
Note that the position of the resized_image_shape changes based on whether
masks are present.
resized_image: A tensor of size [new_height, new_width, channels].
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width]
resized_image_shape: A 1D tensor of shape [3] containing the shape of the
resized image.
Raises:
ValueError: if the image is not a 3D tensor.
......@@ -1803,6 +2171,7 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]):
image_height = tf.shape(image)[0]
image_width = tf.shape(image)[1]
num_channels = tf.shape(image)[2]
min_image_dimension = tf.minimum(image_height, image_width)
min_target_dimension = tf.maximum(min_image_dimension, min_dimension)
target_ratio = tf.to_float(min_target_dimension) / tf.to_float(
......@@ -1813,13 +2182,16 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
tf.expand_dims(image, axis=0),
size=[target_height, target_width],
align_corners=True)
result = tf.squeeze(image, axis=0)
result = [tf.squeeze(image, axis=0)]
if masks is not None:
masks = tf.image.resize_nearest_neighbor(
tf.expand_dims(masks, axis=3),
size=[target_height, target_width],
align_corners=True)
result = (result, tf.squeeze(masks, axis=3))
result.append(tf.squeeze(masks, axis=3))
result.append(tf.stack([target_height, target_width, num_channels]))
return result
......@@ -1854,6 +2226,8 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
return tuple(result)
# TODO: Investigate if instead the function should return None if
# masks is None.
# pylint: disable=g-doc-return-or-yield
def resize_image(image,
masks=None,
......@@ -1861,7 +2235,28 @@ def resize_image(image,
new_width=1024,
method=tf.image.ResizeMethod.BILINEAR,
align_corners=False):
"""See `tf.image.resize_images` for detailed doc."""
"""Resizes images to the given height and width.
Args:
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks.
new_height: (optional) (scalar) desired height of the image.
new_width: (optional) (scalar) desired width of the image.
method: (optional) interpolation method used in resizing. Defaults to
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
Returns:
Note that the position of the resized_image_shape changes based on whether
masks are present.
resized_image: A tensor of size [new_height, new_width, channels].
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width]
resized_image_shape: A 1D tensor of shape [3] containing the shape of the
resized image.
"""
with tf.name_scope(
'ResizeImage',
values=[image, new_height, new_width, method, align_corners]):
......@@ -1869,7 +2264,8 @@ def resize_image(image,
image, [new_height, new_width],
method=method,
align_corners=align_corners)
result = new_image
image_shape = shape_utils.combined_static_and_dynamic_shape(image)
result = [new_image]
if masks is not None:
num_instances = tf.shape(masks)[0]
new_size = tf.constant([new_height, new_width], dtype=tf.int32)
......@@ -1886,8 +2282,9 @@ def resize_image(image,
masks = tf.cond(num_instances > 0, resize_masks_branch,
reshape_masks_branch)
result = [new_image, masks]
result.append(masks)
result.append(tf.stack([new_height, new_width, image_shape[2]]))
return result
......@@ -1946,7 +2343,7 @@ def rgb_to_gray(image):
Returns:
image: A single channel grayscale image -> [image, height, 1].
"""
return tf.image.rgb_to_grayscale(image)
return _rgb_to_grayscale(image)
def ssd_random_crop(image,
......@@ -1960,7 +2357,8 @@ def ssd_random_crop(image,
area_range=((0.1, 1.0),) * 7,
overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
random_coef=(0.15,) * 7,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
......@@ -1994,6 +2392,10 @@ def ssd_random_crop(image,
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
......@@ -2046,14 +2448,17 @@ def ssd_random_crop(image,
area_range=area_range[index],
overlap_thresh=overlap_thresh[index],
random_coef=random_coef[index],
seed=seed)
seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
result = _apply_with_random_selector_tuples(
tuple(
t for t in (image, boxes, labels, label_scores, masks, keypoints)
if t is not None),
random_crop_selector,
num_cases=len(min_object_covered))
num_cases=len(min_object_covered),
preprocess_vars_cache=preprocess_vars_cache,
key=preprocessor_cache.PreprocessorCache.SSD_CROP_SELECTOR_ID)
return result
......@@ -2069,7 +2474,8 @@ def ssd_random_crop_pad(image,
min_padded_size_ratio=((1.0, 1.0),) * 6,
max_padded_size_ratio=((2.0, 2.0),) * 6,
pad_color=(None,) * 6,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
......@@ -2105,6 +2511,10 @@ def ssd_random_crop_pad(image,
if set as None, it will be set to average color of the randomly
cropped image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: Image shape will be [new_height, new_width, channels].
......@@ -2134,12 +2544,15 @@ def ssd_random_crop_pad(image,
min_padded_size_ratio=min_padded_size_ratio[index],
max_padded_size_ratio=max_padded_size_ratio[index],
pad_color=pad_color[index],
seed=seed)
seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
return _apply_with_random_selector_tuples(
tuple(t for t in (image, boxes, labels, label_scores) if t is not None),
random_crop_pad_selector,
num_cases=len(min_object_covered))
num_cases=len(min_object_covered),
preprocess_vars_cache=preprocess_vars_cache,
key=preprocessor_cache.PreprocessorCache.SSD_CROP_PAD_SELECTOR_ID)
def ssd_random_crop_fixed_aspect_ratio(
......@@ -2154,7 +2567,8 @@ def ssd_random_crop_fixed_aspect_ratio(
area_range=((0.1, 1.0),) * 7,
overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
random_coef=(0.15,) * 7,
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
......@@ -2191,6 +2605,10 @@ def ssd_random_crop_fixed_aspect_ratio(
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
......@@ -2209,7 +2627,8 @@ def ssd_random_crop_fixed_aspect_ratio(
crop_result = ssd_random_crop(
image, boxes, labels, label_scores, masks, keypoints, min_object_covered,
aspect_ratio_range, area_range, overlap_thresh, random_coef, seed)
aspect_ratio_range, area_range, overlap_thresh, random_coef, seed,
preprocess_vars_cache)
i = 3
new_image, new_boxes, new_labels = crop_result[:i]
new_label_scores = None
......@@ -2231,7 +2650,8 @@ def ssd_random_crop_fixed_aspect_ratio(
new_masks,
new_keypoints,
aspect_ratio=aspect_ratio,
seed=seed)
seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
return result
......@@ -2251,7 +2671,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
random_coef=(0.15,) * 7,
min_padded_size_ratio=(1.0, 1.0),
max_padded_size_ratio=(2.0, 2.0),
seed=None):
seed=None,
preprocess_vars_cache=None):
"""Random crop and pad preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
......@@ -2294,6 +2715,10 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
......@@ -2310,7 +2735,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
"""
crop_result = ssd_random_crop(
image, boxes, labels, label_scores, masks, keypoints, min_object_covered,
aspect_ratio_range, area_range, overlap_thresh, random_coef, seed)
aspect_ratio_range, area_range, overlap_thresh, random_coef, seed,
preprocess_vars_cache)
i = 3
new_image, new_boxes, new_labels = crop_result[:i]
new_label_scores = None
......@@ -2332,7 +2758,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
aspect_ratio=aspect_ratio,
min_padded_size_ratio=min_padded_size_ratio,
max_padded_size_ratio=max_padded_size_ratio,
seed=seed)
seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
result = list(result)
if new_label_scores is not None:
......@@ -2480,7 +2907,10 @@ def get_default_func_arg_map(include_label_scores=False,
return prep_func_arg_map
def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
def preprocess(tensor_dict,
preprocess_options,
func_arg_map=None,
preprocess_vars_cache=None):
"""Preprocess images and bounding boxes.
Various types of preprocessing (to be implemented) based on the
......@@ -2505,6 +2935,10 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
their values.
func_arg_map: mapping from preprocessing functions to arguments that they
expect to receive and return.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
tensor_dict: which contains the preprocessed images, bounding boxes, etc.
......@@ -2544,6 +2978,9 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
return tensor_dict[key] if key is not None else None
args = [get_arg(a) for a in arg_names]
if (preprocess_vars_cache is not None and
'preprocess_vars_cache' in inspect.getargspec(func).args):
params['preprocess_vars_cache'] = preprocess_vars_cache
results = func(*args, **params)
if not isinstance(results, (list, tuple)):
results = (results,)
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Records previous preprocessing operations and allows them to be repeated.
Used with object_detection.core.preprocessor. Passing a PreprocessorCache
into individual data augmentation functions or the general preprocess() function
will store all randomly generated variables in the PreprocessorCache. When
a preprocessor function is called multiple times with the same
PreprocessorCache object, that function will perform the same augmentation
on all calls.
"""
from collections import defaultdict
class PreprocessorCache(object):
"""Dictionary wrapper storing random variables generated during preprocessing.
"""
# Constant keys representing different preprocessing functions
ROTATION90 = 'rotation90'
HORIZONTAL_FLIP = 'horizontal_flip'
VERTICAL_FLIP = 'vertical_flip'
PIXEL_VALUE_SCALE = 'pixel_value_scale'
IMAGE_SCALE = 'image_scale'
RGB_TO_GRAY = 'rgb_to_gray'
ADJUST_BRIGHTNESS = 'adjust_brightness'
ADJUST_CONTRAST = 'adjust_contrast'
ADJUST_HUE = 'adjust_hue'
ADJUST_SATURATION = 'adjust_saturation'
DISTORT_COLOR = 'distort_color'
STRICT_CROP_IMAGE = 'strict_crop_image'
CROP_IMAGE = 'crop_image'
PAD_IMAGE = 'pad_image'
CROP_TO_ASPECT_RATIO = 'crop_to_aspect_ratio'
RESIZE_METHOD = 'resize_method'
PAD_TO_ASPECT_RATIO = 'pad_to_aspect_ratio'
BLACK_PATCHES = 'black_patches'
ADD_BLACK_PATCH = 'add_black_patch'
SELECTOR = 'selector'
SELECTOR_TUPLES = 'selector_tuples'
SSD_CROP_SELECTOR_ID = 'ssd_crop_selector_id'
SSD_CROP_PAD_SELECTOR_ID = 'ssd_crop_pad_selector_id'
# 23 permitted function ids
_VALID_FNS = [ROTATION90, HORIZONTAL_FLIP, VERTICAL_FLIP, PIXEL_VALUE_SCALE,
IMAGE_SCALE, RGB_TO_GRAY, ADJUST_BRIGHTNESS, ADJUST_CONTRAST,
ADJUST_HUE, ADJUST_SATURATION, DISTORT_COLOR, STRICT_CROP_IMAGE,
CROP_IMAGE, PAD_IMAGE, CROP_TO_ASPECT_RATIO, RESIZE_METHOD,
PAD_TO_ASPECT_RATIO, BLACK_PATCHES, ADD_BLACK_PATCH, SELECTOR,
SELECTOR_TUPLES, SSD_CROP_SELECTOR_ID, SSD_CROP_PAD_SELECTOR_ID]
def __init__(self):
self._history = defaultdict(dict)
def clear(self):
"""Resets cache."""
self._history = {}
def get(self, function_id, key):
"""Gets stored value given a function id and key.
Args:
function_id: identifier for the preprocessing function used.
key: identifier for the variable stored.
Returns:
value: the corresponding value, expected to be a tensor or
nested structure of tensors.
Raises:
ValueError: if function_id is not one of the 23 valid function ids.
"""
if function_id not in self._VALID_FNS:
raise ValueError('Function id not recognized: %s.' % str(function_id))
return self._history[function_id].get(key)
def update(self, function_id, key, value):
"""Adds a value to the dictionary.
Args:
function_id: identifier for the preprocessing function used.
key: identifier for the variable stored.
value: the value to store, expected to be a tensor or nested structure
of tensors.
Raises:
ValueError: if function_id is not one of the 23 valid function ids.
"""
if function_id not in self._VALID_FNS:
raise ValueError('Function id not recognized: %s.' % str(function_id))
self._history[function_id][key] = value
......@@ -21,6 +21,7 @@ import six
import tensorflow as tf
from object_detection.core import preprocessor
from object_detection.core import preprocessor_cache
from object_detection.core import standard_fields as fields
if six.PY2:
......@@ -290,6 +291,15 @@ class PreprocessorTest(tf.test.TestCase):
def expectedLabelsAfterThresholdingWithMissingScore(self):
return tf.constant([2], dtype=tf.float32)
def testRgbToGrayscale(self):
images = self.createTestImages()
grayscale_images = preprocessor._rgb_to_grayscale(images)
expected_images = tf.image.rgb_to_grayscale(images)
with self.test_session() as sess:
(grayscale_images, expected_images) = sess.run(
[grayscale_images, expected_images])
self.assertAllEqual(expected_images, grayscale_images)
def testNormalizeImage(self):
preprocess_options = [(preprocessor.normalize_image, {
'original_minval': 0,
......@@ -435,6 +445,55 @@ class PreprocessorTest(tf.test.TestCase):
rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask])
self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten())
def _testPreprocessorCache(self,
preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False,
num_runs=4):
cache = preprocessor_cache.PreprocessorCache()
images = self.createTestImages()
boxes = self.createTestBoxes()
classes = self.createTestLabels()
masks = self.createTestMasks()
keypoints = self.createTestKeypoints()
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=test_masks, include_keypoints=test_keypoints)
out = []
for i in range(num_runs):
tensor_dict = {
fields.InputDataFields.image: images,
}
num_outputs = 1
if test_boxes:
tensor_dict[fields.InputDataFields.groundtruth_boxes] = boxes
tensor_dict[fields.InputDataFields.groundtruth_classes] = classes
num_outputs += 1
if test_masks:
tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
num_outputs += 1
if test_keypoints:
tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints
num_outputs += 1
out.append(preprocessor.preprocess(
tensor_dict, preprocess_options, preprocessor_arg_map, cache))
with self.test_session() as sess:
to_run = []
for i in range(num_runs):
to_run.append(out[i][fields.InputDataFields.image])
if test_boxes:
to_run.append(out[i][fields.InputDataFields.groundtruth_boxes])
if test_masks:
to_run.append(
out[i][fields.InputDataFields.groundtruth_instance_masks])
if test_keypoints:
to_run.append(out[i][fields.InputDataFields.groundtruth_keypoints])
out_array = sess.run(to_run)
for i in range(num_outputs, len(out_array)):
self.assertAllClose(out_array[i], out_array[i - num_outputs])
def testRandomHorizontalFlip(self):
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
images = self.expectedImagesAfterNormalization()
......@@ -491,6 +550,16 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(boxes_, boxes_expected_)
self.assertAllClose(images_diff_, images_diff_expected_)
def testRandomHorizontalFlipWithCache(self):
keypoint_flip_permutation = self.createKeypointFlipPermutation()
preprocess_options = [
(preprocessor.random_horizontal_flip,
{'keypoint_flip_permutation': keypoint_flip_permutation})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
image_height = 3
......@@ -578,6 +647,16 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(boxes_, boxes_expected_)
self.assertAllClose(images_diff_, images_diff_expected_)
def testRandomVerticalFlipWithCache(self):
keypoint_flip_permutation = self.createKeypointFlipPermutation()
preprocess_options = [
(preprocessor.random_vertical_flip,
{'keypoint_flip_permutation': keypoint_flip_permutation})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRunRandomVerticalFlipWithMaskAndKeypoints(self):
preprocess_options = [(preprocessor.random_vertical_flip, {})]
image_height = 3
......@@ -665,6 +744,13 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(boxes_, boxes_expected_)
self.assertAllClose(images_diff_, images_diff_expected_)
def testRandomRotation90WithCache(self):
preprocess_options = [(preprocessor.random_rotation90, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRunRandomRotation90WithMaskAndKeypoints(self):
preprocess_options = [(preprocessor.random_rotation90, {})]
image_height = 3
......@@ -716,6 +802,20 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(values_greater_, values_true_)
self.assertAllClose(values_less_, values_true_)
def testRandomPixelValueScaleWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_pixel_value_scale, {}))
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=False,
test_keypoints=False)
def testRandomImageScale(self):
preprocess_options = [(preprocessor.random_image_scale, {})]
images_original = self.createTestImages()
......@@ -736,6 +836,13 @@ class PreprocessorTest(tf.test.TestCase):
self.assertTrue(
images_original_shape_[2] * 2.0 >= images_scaled_shape_[2])
def testRandomImageScaleWithCache(self):
preprocess_options = [(preprocessor.random_image_scale, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomRGBtoGray(self):
preprocess_options = [(preprocessor.random_rgb_to_gray, {})]
images_original = self.createTestImages()
......@@ -769,6 +876,14 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(images_g_diff_, image_zero1_)
self.assertAllClose(images_b_diff_, image_zero1_)
def testRandomRGBtoGrayWithCache(self):
preprocess_options = [(
preprocessor.random_rgb_to_gray, {'probability': 0.5})]
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomAdjustBrightness(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
......@@ -789,6 +904,20 @@ class PreprocessorTest(tf.test.TestCase):
[image_original_shape, image_bright_shape])
self.assertAllEqual(image_original_shape_, image_bright_shape_)
def testRandomAdjustBrightnessWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_adjust_brightness, {}))
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomAdjustContrast(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
......@@ -809,6 +938,20 @@ class PreprocessorTest(tf.test.TestCase):
[image_original_shape, image_contrast_shape])
self.assertAllEqual(image_original_shape_, image_contrast_shape_)
def testRandomAdjustContrastWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_adjust_contrast, {}))
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomAdjustHue(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
......@@ -829,6 +972,20 @@ class PreprocessorTest(tf.test.TestCase):
[image_original_shape, image_hue_shape])
self.assertAllEqual(image_original_shape_, image_hue_shape_)
def testRandomAdjustHueWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_adjust_hue, {}))
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomDistortColor(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
......@@ -849,6 +1006,20 @@ class PreprocessorTest(tf.test.TestCase):
[images_original_shape, images_distorted_color_shape])
self.assertAllEqual(images_original_shape_, images_distorted_color_shape_)
def testRandomDistortColorWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_distort_color, {}))
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomJitterBoxes(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.random_jitter_boxes, {}))
......@@ -900,6 +1071,21 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testRandomCropImageWithCache(self):
preprocess_options = [(preprocessor.random_rgb_to_gray,
{'probability': 0.5}),
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1,
}),
(preprocessor.random_crop_image, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=False,
test_keypoints=False)
def testRandomCropImageGrayscale(self):
preprocessing_options = [(preprocessor.rgb_to_gray, {}),
(preprocessor.normalize_image, {
......@@ -1446,6 +1632,13 @@ class PreprocessorTest(tf.test.TestCase):
self.expectedKeypointsAfterThresholding()])
self.assertAllClose(retained_keypoints_, expected_keypoints_)
def testRandomCropToAspectRatioWithCache(self):
preprocess_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=False,
test_keypoints=False)
def testRunRandomCropToAspectRatioWithMasks(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
......@@ -1536,6 +1729,13 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(distorted_keypoints_.flatten(),
expected_keypoints.flatten())
def testRandomPadToAspectRatioWithCache(self):
preprocess_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRunRandomPadToAspectRatioWithMasks(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
......@@ -1624,6 +1824,17 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(distorted_keypoints_.flatten(),
expected_keypoints.flatten())
def testRandomPadImageWithCache(self):
preprocess_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1,}), (preprocessor.random_pad_image, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRandomPadImage(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
......@@ -1670,6 +1881,17 @@ class PreprocessorTest(tf.test.TestCase):
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
padded_boxes_[:, 3] - padded_boxes_[:, 1])))
def testRandomCropPadImageWithCache(self):
preprocess_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1,}), (preprocessor.random_crop_pad_image, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRandomCropPadImageWithRandomCoefOne(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
......@@ -1788,6 +2010,22 @@ class PreprocessorTest(tf.test.TestCase):
self.assertEqual(images_shape_[1], padded_images_shape_[1])
self.assertEqual(2 * images_shape_[2], padded_images_shape_[2])
def testRandomBlackPatchesWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_black_patches, {
'size_to_image_ratio': 0.5
}))
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRandomBlackPatches(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
......@@ -1812,6 +2050,22 @@ class PreprocessorTest(tf.test.TestCase):
[images_shape, blacked_images_shape])
self.assertAllEqual(images_shape_, blacked_images_shape_)
def testRandomResizeMethodWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_resize_method, {
'target_size': (75, 150)
}))
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRandomResizeMethod(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
......@@ -1853,7 +2107,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_image(
out_image, out_masks, _ = preprocessor.resize_image(
in_image, in_masks, new_height=height, new_width=width)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
......@@ -1880,7 +2134,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_image(
out_image, out_masks, _ = preprocessor.resize_image(
in_image, in_masks, new_height=height, new_width=width)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
......@@ -1900,7 +2154,7 @@ class PreprocessorTest(tf.test.TestCase):
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.random_uniform(in_shape)
out_image = preprocessor.resize_to_range(
out_image, _ = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim)
self.assertAllEqual(out_image.get_shape().as_list(), expected_shape)
......@@ -1913,7 +2167,7 @@ class PreprocessorTest(tf.test.TestCase):
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
out_image = preprocessor.resize_to_range(
out_image, _ = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
with self.test_session() as sess:
......@@ -1938,7 +2192,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range(
out_image, out_masks, _ = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape)
self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape)
......@@ -1960,7 +2214,7 @@ class PreprocessorTest(tf.test.TestCase):
in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range(
out_image, out_masks, _ = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
......@@ -1991,7 +2245,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range(
out_image, out_masks, _ = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
......@@ -2016,7 +2270,7 @@ class PreprocessorTest(tf.test.TestCase):
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.random_uniform(in_shape)
out_image = preprocessor.resize_to_range(
out_image, _ = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
......@@ -2039,7 +2293,7 @@ class PreprocessorTest(tf.test.TestCase):
in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_min_dimension(
out_image, out_masks, _ = preprocessor.resize_to_min_dimension(
in_image, in_masks, min_dimension=min_dim)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
......@@ -2069,7 +2323,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_min_dimension(
out_image, out_masks, _ = preprocessor.resize_to_min_dimension(
in_image, in_masks, min_dimension=min_dim)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
......@@ -2144,6 +2398,20 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllEqual([0, 1, 1, 0, 1], one_hot)
def testSSDRandomCropWithCache(self):
preprocess_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=False,
test_keypoints=False)
def testSSDRandomCrop(self):
preprocessing_options = [
(preprocessor.normalize_image, {
......@@ -2216,6 +2484,20 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testSSDRandomCropFixedAspectRatioWithCache(self):
preprocess_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=False,
test_keypoints=False)
def _testSSDRandomCropFixedAspectRatio(self,
include_label_scores,
include_instance_masks,
......
......@@ -57,6 +57,10 @@ class InputDataFields(object):
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_label_scores: groundtruth label scores.
groundtruth_weights: groundtruth weight factor for bounding boxes.
num_groundtruth_boxes: number of groundtruth boxes.
true_image_shapes: true shapes of images in the resized images, as resized
images can be padded with zeros.
"""
image = 'image'
original_image = 'original_image'
......@@ -79,10 +83,13 @@ class InputDataFields(object):
groundtruth_keypoints = 'groundtruth_keypoints'
groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
groundtruth_label_scores = 'groundtruth_label_scores'
groundtruth_weights = 'groundtruth_weights'
num_groundtruth_boxes = 'num_groundtruth_boxes'
true_image_shape = 'true_image_shape'
class DetectionResultFields(object):
"""Naming converntions for storing the output of the detector.
"""Naming conventions for storing the output of the detector.
Attributes:
source_id: source of the original image.
......@@ -162,6 +169,7 @@ class TfExampleFields(object):
object_is_crowd: [DEPRECATED, use object_group_of instead]
is the object a single object or a crowd
object_segment_area: the area of the segment.
object_weight: a weight factor for the object's bounding box.
instance_masks: instance segmentation masks.
instance_boundaries: instance boundaries.
instance_classes: Classes for each instance segmentation mask.
......@@ -194,6 +202,7 @@ class TfExampleFields(object):
object_depiction = 'image/object/depiction'
object_is_crowd = 'image/object/is_crowd'
object_segment_area = 'image/object/segment/area'
object_weight = 'image/object/weight'
instance_masks = 'image/segmentation/object'
instance_boundaries = 'image/boundaries/object'
instance_classes = 'image/segmentation/object/class'
......
......@@ -37,19 +37,19 @@ from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_coder as bcoder
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.core import standard_fields as fields
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
from object_detection.utils import shape_utils
class TargetAssigner(object):
"""Target assigner to compute classification and regression targets."""
def __init__(self, similarity_calc, matcher, box_coder,
positive_class_weight=1.0, negative_class_weight=1.0,
unmatched_cls_target=None):
negative_class_weight=1.0, unmatched_cls_target=None):
"""Construct Object Detection Target Assigner.
Args:
......@@ -58,10 +58,8 @@ class TargetAssigner(object):
anchors.
box_coder: an object_detection.core.BoxCoder used to encode matching
groundtruth boxes with respect to anchors.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
anchors (default: 1.0). The weight must be in [0., 1.].
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
......@@ -82,7 +80,6 @@ class TargetAssigner(object):
self._similarity_calc = similarity_calc
self._matcher = matcher
self._box_coder = box_coder
self._positive_class_weight = positive_class_weight
self._negative_class_weight = negative_class_weight
if unmatched_cls_target is None:
self._unmatched_cls_target = tf.constant([0], tf.float32)
......@@ -94,7 +91,7 @@ class TargetAssigner(object):
return self._box_coder
def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
**params):
groundtruth_weights=None, **params):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
......@@ -113,6 +110,9 @@ class TargetAssigner(object):
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1).
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
**params: Additional keyword arguments for specific implementations of
the Matcher.
......@@ -140,14 +140,21 @@ class TargetAssigner(object):
groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
0))
groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
unmatched_shape_assert = tf.assert_equal(
tf.shape(groundtruth_labels)[1:], tf.shape(self._unmatched_cls_target),
message='Unmatched class target shape incompatible '
'with groundtruth labels shape!')
labels_and_box_shapes_assert = tf.assert_equal(
tf.shape(groundtruth_labels)[0], groundtruth_boxes.num_boxes(),
message='Groundtruth boxes and labels have incompatible shapes!')
unmatched_shape_assert = shape_utils.assert_shape_equal(
shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
shape_utils.combined_static_and_dynamic_shape(
self._unmatched_cls_target))
labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
shape_utils.combined_static_and_dynamic_shape(
groundtruth_labels)[:1],
shape_utils.combined_static_and_dynamic_shape(
groundtruth_boxes.get())[:1])
if groundtruth_weights is None:
num_gt_boxes = groundtruth_boxes.num_boxes_static()
if not num_gt_boxes:
num_gt_boxes = groundtruth_boxes.num_boxes()
groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
with tf.control_dependencies(
[unmatched_shape_assert, labels_and_box_shapes_assert]):
match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
......@@ -158,16 +165,16 @@ class TargetAssigner(object):
match)
cls_targets = self._create_classification_targets(groundtruth_labels,
match)
reg_weights = self._create_regression_weights(match)
cls_weights = self._create_classification_weights(
match, self._positive_class_weight, self._negative_class_weight)
reg_weights = self._create_regression_weights(match, groundtruth_weights)
cls_weights = self._create_classification_weights(match,
groundtruth_weights)
num_anchors = anchors.num_boxes_static()
if num_anchors is not None:
reg_targets = self._reset_target_shape(reg_targets, num_anchors)
cls_targets = self._reset_target_shape(cls_targets, num_anchors)
reg_weights = self._reset_target_shape(reg_weights, num_anchors)
cls_weights = self._reset_target_shape(cls_weights, num_anchors)
num_anchors = anchors.num_boxes_static()
if num_anchors is not None:
reg_targets = self._reset_target_shape(reg_targets, num_anchors)
cls_targets = self._reset_target_shape(cls_targets, num_anchors)
reg_weights = self._reset_target_shape(reg_weights, num_anchors)
cls_weights = self._reset_target_shape(cls_weights, num_anchors)
return cls_targets, cls_weights, reg_targets, reg_weights, match
......@@ -198,23 +205,31 @@ class TargetAssigner(object):
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
matched_anchor_indices = match.matched_column_indices()
unmatched_ignored_anchor_indices = (match.
unmatched_or_ignored_column_indices())
matched_gt_indices = match.matched_row_indices()
matched_anchors = box_list_ops.gather(anchors,
matched_anchor_indices)
matched_gt_boxes = box_list_ops.gather(groundtruth_boxes,
matched_gt_indices)
matched_reg_targets = self._box_coder.encode(matched_gt_boxes,
matched_anchors)
matched_gt_boxes = match.gather_based_on_match(
groundtruth_boxes.get(),
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
groundtruth_keypoints = groundtruth_boxes.get_field(
fields.BoxListFields.keypoints)
matched_keypoints = match.gather_based_on_match(
groundtruth_keypoints,
unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
matched_keypoints)
matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
match_results_shape = shape_utils.combined_static_and_dynamic_shape(
match.match_results)
# Zero out the unmatched and ignored regression targets.
unmatched_ignored_reg_targets = tf.tile(
self._default_regression_target(),
tf.stack([tf.size(unmatched_ignored_anchor_indices), 1]))
reg_targets = tf.dynamic_stitch(
[matched_anchor_indices, unmatched_ignored_anchor_indices],
[matched_reg_targets, unmatched_ignored_reg_targets])
# TODO: summarize the number of matches on average.
self._default_regression_target(), [match_results_shape[0], 1])
matched_anchors_mask = match.matched_column_indicator()
reg_targets = tf.where(matched_anchors_mask,
matched_reg_targets,
unmatched_ignored_reg_targets)
return reg_targets
def _default_regression_target(self):
......@@ -245,27 +260,16 @@ class TargetAssigner(object):
and groundtruth boxes.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
shape [num_gt_boxes, d_1, d_2, ... d_k].
"""
matched_anchor_indices = match.matched_column_indices()
unmatched_ignored_anchor_indices = (match.
unmatched_or_ignored_column_indices())
matched_gt_indices = match.matched_row_indices()
matched_cls_targets = tf.gather(groundtruth_labels, matched_gt_indices)
ones = self._unmatched_cls_target.shape.ndims * [1]
unmatched_ignored_cls_targets = tf.tile(
tf.expand_dims(self._unmatched_cls_target, 0),
tf.stack([tf.size(unmatched_ignored_anchor_indices)] + ones))
cls_targets = tf.dynamic_stitch(
[matched_anchor_indices, unmatched_ignored_anchor_indices],
[matched_cls_targets, unmatched_ignored_cls_targets])
return cls_targets
def _create_regression_weights(self, match):
return match.gather_based_on_match(
groundtruth_labels,
unmatched_value=self._unmatched_cls_target,
ignored_value=self._unmatched_cls_target)
def _create_regression_weights(self, match, groundtruth_weights):
"""Set regression weight for each anchor.
Only positive anchors are set to contribute to the regression loss, so this
......@@ -275,18 +279,18 @@ class TargetAssigner(object):
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box.
Returns:
reg_weights: a float32 tensor with shape [num_anchors] representing
regression weights
a float32 tensor with shape [num_anchors] representing regression weights.
"""
reg_weights = tf.cast(match.matched_column_indicator(), tf.float32)
return reg_weights
return match.gather_based_on_match(
groundtruth_weights, ignored_value=0., unmatched_value=0.)
def _create_classification_weights(self,
match,
positive_class_weight=1.0,
negative_class_weight=1.0):
groundtruth_weights):
"""Create classification weights for each anchor.
Positive (matched) anchors are associated with a weight of
......@@ -299,25 +303,23 @@ class TargetAssigner(object):
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
positive_class_weight: weight to be associated to positive anchors
negative_class_weight: weight to be associated to negative anchors
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box.
Returns:
cls_weights: a float32 tensor with shape [num_anchors] representing
classification weights.
a float32 tensor with shape [num_anchors] representing classification
weights.
"""
matched_indicator = tf.cast(match.matched_column_indicator(), tf.float32)
ignore_indicator = tf.cast(match.ignored_column_indicator(), tf.float32)
unmatched_indicator = 1.0 - matched_indicator - ignore_indicator
cls_weights = (positive_class_weight * matched_indicator
+ negative_class_weight * unmatched_indicator)
return cls_weights
return match.gather_based_on_match(
groundtruth_weights,
ignored_value=0.,
unmatched_value=self._negative_class_weight)
def get_box_coder(self):
"""Get BoxCoder of this TargetAssigner.
Returns:
BoxCoder: BoxCoder object.
BoxCoder object.
"""
return self._box_coder
......@@ -325,7 +327,6 @@ class TargetAssigner(object):
# TODO: This method pulls in all the implementation dependencies into
# core. Therefore its best to have this factory method outside of core.
def create_target_assigner(reference, stage=None,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=None):
"""Factory function for creating standard target assigners.
......@@ -333,8 +334,6 @@ def create_target_assigner(reference, stage=None,
Args:
reference: string referencing the type of TargetAssigner.
stage: string denoting stage: {proposal, detection}.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
......@@ -383,7 +382,6 @@ def create_target_assigner(reference, stage=None,
raise ValueError('No valid combination of reference and stage.')
return TargetAssigner(similarity_calc, matcher, box_coder,
positive_class_weight=positive_class_weight,
negative_class_weight=negative_class_weight,
unmatched_cls_target=unmatched_cls_target)
......@@ -391,7 +389,8 @@ def create_target_assigner(reference, stage=None,
def batch_assign_targets(target_assigner,
anchors_batch,
gt_box_batch,
gt_class_targets_batch):
gt_class_targets_batch,
gt_weights_batch=None):
"""Batched assignment of classification and regression targets.
Args:
......@@ -404,6 +403,8 @@ def batch_assign_targets(target_assigner,
each tensor has shape [num_gt_boxes_i, classification_target_size] and
num_gt_boxes_i is the number of boxes in the ith boxlist of
gt_box_batch.
gt_weights_batch: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
......@@ -437,11 +438,13 @@ def batch_assign_targets(target_assigner,
reg_targets_list = []
reg_weights_list = []
match_list = []
for anchors, gt_boxes, gt_class_targets in zip(
anchors_batch, gt_box_batch, gt_class_targets_batch):
if gt_weights_batch is None:
gt_weights_batch = [None] * len(gt_class_targets_batch)
for anchors, gt_boxes, gt_class_targets, gt_weights in zip(
anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch):
(cls_targets, cls_weights, reg_targets,
reg_weights, match) = target_assigner.assign(
anchors, gt_boxes, gt_class_targets)
anchors, gt_boxes, gt_class_targets, gt_weights)
cls_targets_list.append(cls_targets)
cls_weights_list.append(cls_weights)
reg_targets_list.append(reg_targets)
......
......@@ -17,135 +17,238 @@
import numpy as np
import tensorflow as tf
from object_detection.box_coders import keypoint_box_coder
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_list
from object_detection.core import region_similarity_calculator
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner as targetassigner
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
from object_detection.utils import test_case
class TargetAssignerTest(tf.test.TestCase):
class TargetAssignerTest(test_case.TestCase):
def test_assign_agnostic(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0]])
prior_stddevs = tf.constant(3 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9]]
boxes = box_list.BoxList(tf.constant(box_corners))
def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners):
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0]], dtype=np.float32)
anchor_stddevs = np.array(3 * [4 * [.1]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]],
dtype=np.float32)
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
exp_matching_anchors = [0, 1]
result = target_assigner.assign(priors, boxes, num_valid_rows=2)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_with_ignored_matches(self):
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_box_corners])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
def test_assign_class_agnostic_with_ignored_matches(self):
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.3)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0.0, 0.5, .9, 1.0]])
prior_stddevs = tf.constant(3 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]]
boxes = box_list.BoxList(tf.constant(box_corners))
def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners):
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.3)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0.0, 0.5, .9, 1.0]], dtype=np.float32)
anchor_stddevs = np.array(3 * [4 * [.1]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]], dtype=np.float32)
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 0]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
exp_matching_anchors = [0, 1]
result = target_assigner.assign(priors, boxes)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_box_corners])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
def test_assign_agnostic_with_keypoints(self):
def graph_fn(anchor_means, groundtruth_box_corners,
groundtruth_keypoints):
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
anchors_boxlist = box_list.BoxList(anchor_means)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
groundtruth_keypoints)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 1.0],
[0.0, 0.5, .9, 1.0]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.45, 0.45, 0.95, 0.95]],
dtype=np.float32)
groundtruth_keypoints = np.array(
[[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]],
[[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
dtype=np.float32)
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
-5],
[-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
-11, -7],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means,
groundtruth_box_corners,
groundtruth_keypoints])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
def test_assign_class_agnostic_with_keypoints_and_ignored_matches(self):
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
def graph_fn(anchor_means, groundtruth_box_corners,
groundtruth_keypoints):
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
anchors_boxlist = box_list.BoxList(anchor_means)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
groundtruth_keypoints)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 1.0],
[0.0, 0.5, .9, 1.0]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.45, 0.45, 0.95, 0.95]],
dtype=np.float32)
groundtruth_keypoints = np.array(
[[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]],
[[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
dtype=np.float32)
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
-5],
[-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
-11, -7],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means,
groundtruth_box_corners,
groundtruth_keypoints])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
def test_assign_multiclass(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], tf.float32)
def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
groundtruth_labels):
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
groundtruth_labels)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]], dtype=np.float32)
anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]], dtype=np.float32)
groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
......@@ -157,88 +260,98 @@ class TargetAssignerTest(tf.test.TestCase):
[0, 0, 0, 0],
[0, 0, -.5, .2]]
exp_reg_weights = [1, 1, 0, 1]
exp_matching_anchors = [0, 1, 3]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_multiclass_unequal_class_weights(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0, negative_class_weight=0.5,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], tf.float32)
exp_cls_weights = [1, 1, .5, 1]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(_, cls_weights, _, _, _) = result
with self.test_session() as sess:
cls_weights_out = sess.run(cls_weights)
self.assertAllClose(cls_weights_out, exp_cls_weights)
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_box_corners,
groundtruth_labels])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
def test_assign_multiclass_with_groundtruth_weights(self):
def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
groundtruth_labels, groundtruth_weights):
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
groundtruth_labels,
groundtruth_weights)
(_, cls_weights, _, reg_weights, _) = result
return (cls_weights, reg_weights)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]], dtype=np.float32)
anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]], dtype=np.float32)
groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
groundtruth_weights = np.array([0.3, 0., 0.5], dtype=np.float32)
exp_cls_weights = [0.3, 0., 1, 0.5] # background class gets weight of 1.
exp_reg_weights = [0.3, 0., 0., 0.5] # background class gets weight of 0.
(cls_weights_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_box_corners,
groundtruth_labels,
groundtruth_weights])
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_weights_out, exp_reg_weights)
def test_assign_multidimensional_class_targets(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[[0, 1], [1, 0]],
[[1, 0], [0, 1]],
[[0, 1], [1, .5]]], tf.float32)
def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
groundtruth_labels):
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
groundtruth_labels)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]], dtype=np.float32)
anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]], dtype=np.float32)
groundtruth_labels = np.array([[[0, 1], [1, 0]],
[[1, 0], [0, 1]],
[[0, 1], [1, .5]]], np.float32)
exp_cls_targets = [[[0, 1], [1, 0]],
[[1, 0], [0, 1]],
......@@ -250,52 +363,46 @@ class TargetAssignerTest(tf.test.TestCase):
[0, 0, 0, 0],
[0, 0, -.5, .2]]
exp_reg_weights = [1, 1, 0, 1]
exp_matching_anchors = [0, 1, 3]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_box_corners,
groundtruth_labels])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
def test_assign_empty_groundtruth(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners_expanded = tf.constant([[0.0, 0.0, 0.0, 0.0]])
box_corners = tf.slice(box_corners_expanded, [0, 0], [0, 4])
boxes = box_list.BoxList(box_corners)
groundtruth_labels_expanded = tf.constant([[0, 0, 0]], tf.float32)
groundtruth_labels = tf.slice(groundtruth_labels_expanded, [0, 0], [0, 3])
def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
groundtruth_labels):
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
groundtruth_labels)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
groundtruth_labels = np.zeros((0, 3), dtype=np.float32)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]],
dtype=np.float32)
anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
exp_cls_targets = [[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
......@@ -306,26 +413,18 @@ class TargetAssignerTest(tf.test.TestCase):
[0, 0, 0, 0],
[0, 0, 0, 0]]
exp_reg_weights = [0, 0, 0, 0]
exp_matching_anchors = []
result = target_assigner.assign(priors, boxes, groundtruth_labels)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_box_corners,
groundtruth_labels])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
......@@ -353,14 +452,9 @@ class TargetAssignerTest(tf.test.TestCase):
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], tf.float32)
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
with self.test_session() as sess:
with self.assertRaisesWithPredicateMatch(
tf.errors.InvalidArgumentError,
'Groundtruth boxes and labels have incompatible shapes!'):
sess.run([cls_targets, cls_weights, reg_targets, reg_weights])
with self.assertRaisesRegexp(ValueError, 'Unequal shapes'):
target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
def test_raises_error_on_invalid_groundtruth_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
......@@ -380,7 +474,6 @@ class TargetAssignerTest(tf.test.TestCase):
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
with self.assertRaises(ValueError):
......@@ -388,61 +481,66 @@ class TargetAssignerTest(tf.test.TestCase):
num_valid_rows=3)
class BatchTargetAssignerTest(tf.test.TestCase):
class BatchTargetAssignerTest(test_case.TestCase):
def _get_agnostic_target_assigner(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=None)
def _get_multi_class_target_assigner(self, num_classes):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32)
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=unmatched_cls_target)
def _get_multi_dimensional_target_assigner(self, target_dimensions):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant(np.zeros(target_dimensions),
tf.float32)
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=unmatched_cls_target)
def test_batch_assign_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
gt_class_targets = [None, None]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
groundtruth_boxlist2):
box_list1 = box_list.BoxList(groundtruth_boxlist1)
box_list2 = box_list.BoxList(groundtruth_boxlist2)
gt_box_batch = [box_list1, box_list2]
gt_class_targets = [None, None]
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
agnostic_target_assigner = self._get_agnostic_target_assigner()
(cls_targets, cls_weights, reg_targets, reg_weights,
_) = targetassigner.batch_assign_targets(
agnostic_target_assigner, anchors_boxlist, gt_box_batch,
gt_class_targets)
return (cls_targets, cls_weights, reg_targets, reg_weights)
groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]],
dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
anchor_stddevs = np.array([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
......@@ -458,58 +556,128 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[[0], [1], [1], [0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
agnostic_target_assigner = self._get_agnostic_target_assigner()
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
agnostic_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_multiclass_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_boxlist1,
groundtruth_boxlist2])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
class_targets1 = tf.constant([[0, 1, 0, 0]], tf.float32)
class_targets2 = tf.constant([[0, 0, 0, 1],
[0, 0, 1, 0]], tf.float32)
def test_batch_assign_multiclass_targets(self):
def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
groundtruth_boxlist2, class_targets1, class_targets2):
box_list1 = box_list.BoxList(groundtruth_boxlist1)
box_list2 = box_list.BoxList(groundtruth_boxlist2)
gt_box_batch = [box_list1, box_list2]
gt_class_targets = [class_targets1, class_targets2]
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
_) = targetassigner.batch_assign_targets(
multiclass_target_assigner, anchors_boxlist, gt_box_batch,
gt_class_targets)
return (cls_targets, cls_weights, reg_targets, reg_weights)
groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]],
dtype=np.float32)
class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
class_targets2 = np.array([[0, 0, 0, 1],
[0, 0, 1, 0]], dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
anchor_stddevs = np.array([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]], dtype=np.float32)
gt_class_targets = [class_targets1, class_targets2]
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[0, 1, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0]],
[[1, 0, 0, 0],
[0, 0, 0, 1],
[0, 0, 1, 0],
[1, 0, 0, 0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_boxlist1,
groundtruth_boxlist2,
class_targets1,
class_targets2])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
def test_batch_assign_multiclass_targets_with_padded_groundtruth(self):
def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
groundtruth_boxlist2, class_targets1, class_targets2,
groundtruth_weights1, groundtruth_weights2):
box_list1 = box_list.BoxList(groundtruth_boxlist1)
box_list2 = box_list.BoxList(groundtruth_boxlist2)
gt_box_batch = [box_list1, box_list2]
gt_class_targets = [class_targets1, class_targets2]
gt_weights = [groundtruth_weights1, groundtruth_weights2]
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
_) = targetassigner.batch_assign_targets(
multiclass_target_assigner, anchors_boxlist, gt_box_batch,
gt_class_targets, gt_weights)
return (cls_targets, cls_weights, reg_targets, reg_weights)
groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2],
[0., 0., 0., 0.]], dtype=np.float32)
groundtruth_weights1 = np.array([1, 0], dtype=np.float32)
groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842],
[0, 0, 0, 0]],
dtype=np.float32)
groundtruth_weights2 = np.array([1, 1, 0], dtype=np.float32)
class_targets1 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], dtype=np.float32)
class_targets2 = np.array([[0, 0, 0, 1],
[0, 0, 1, 0],
[0, 0, 0, 0]], dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
anchor_stddevs = np.array([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
......@@ -525,68 +693,70 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[1, 0, 0, 0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_boxlist1,
groundtruth_boxlist2,
class_targets1,
class_targets2,
groundtruth_weights1,
groundtruth_weights2])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
def test_batch_assign_multidimensional_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
class_targets1 = tf.constant([[[0, 1, 1],
[1, 1, 0]]], tf.float32)
class_targets2 = tf.constant([[[0, 1, 1],
[1, 1, 0]],
[[0, 0, 1],
[0, 0, 1]]], tf.float32)
gt_class_targets = [class_targets1, class_targets2]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
groundtruth_boxlist2, class_targets1, class_targets2):
box_list1 = box_list.BoxList(groundtruth_boxlist1)
box_list2 = box_list.BoxList(groundtruth_boxlist2)
gt_box_batch = [box_list1, box_list2]
gt_class_targets = [class_targets1, class_targets2]
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
target_dimensions=(2, 3))
(cls_targets, cls_weights, reg_targets, reg_weights,
_) = targetassigner.batch_assign_targets(
multiclass_target_assigner, anchors_boxlist, gt_box_batch,
gt_class_targets)
return (cls_targets, cls_weights, reg_targets, reg_weights)
groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]],
dtype=np.float32)
class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
class_targets2 = np.array([[0, 0, 0, 1],
[0, 0, 1, 0]], dtype=np.float32)
class_targets1 = np.array([[[0, 1, 1],
[1, 1, 0]]], dtype=np.float32)
class_targets2 = np.array([[[0, 1, 1],
[1, 1, 0]],
[[0, 0, 1],
[0, 0, 1]]], dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
anchor_stddevs = np.array([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[[0., 1., 1.],
[1., 1., 0.]],
[[0., 0., 0.],
......@@ -605,72 +775,60 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[0., 0., 0.]]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
target_dimensions=(2, 3))
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_boxlist1,
groundtruth_boxlist2,
class_targets1,
class_targets2])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
def test_batch_assign_empty_groundtruth(self):
box_coords_expanded = tf.zeros((1, 4), tf.float32)
box_coords = tf.slice(box_coords_expanded, [0, 0], [0, 4])
box_list1 = box_list.BoxList(box_coords)
gt_box_batch = [box_list1]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
gt_class_targets):
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
gt_box_batch = [groundtruth_boxlist]
gt_class_targets_batch = [gt_class_targets]
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
_) = targetassigner.batch_assign_targets(
multiclass_target_assigner, anchors_boxlist,
gt_box_batch, gt_class_targets_batch)
return (cls_targets, cls_weights, reg_targets, reg_weights)
groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1]], dtype=np.float32)
anchor_stddevs = np.array([[.1, .1, .1, .1],
[.1, .1, .1, .1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, 0, 0],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1]]
exp_cls_targets = [[[1, 0, 0, 0],
[1, 0, 0, 0]]]
exp_reg_weights = [[0, 0]]
exp_match_0 = []
num_classes = 3
pad = 1
gt_class_targets = tf.zeros((0, num_classes + pad))
gt_class_targets_batch = [gt_class_targets]
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors,
gt_box_batch, gt_class_targets_batch)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 1)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
gt_class_targets = np.zeros((0, num_classes + pad), dtype=np.float32)
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(
graph_fn, [anchor_means, anchor_stddevs, groundtruth_box_corners,
gt_class_targets])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
class CreateTargetAssignerTest(tf.test.TestCase):
......
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
exports_files([
"pet_label_map.pbtxt",
])
......@@ -12,9 +12,10 @@ py_library(
srcs = ["tf_example_decoder.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:data_decoder",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/utils:label_map_util",
"//tensorflow/models/research/object_detection/core:data_decoder",
"//tensorflow/models/research/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
......@@ -24,6 +25,7 @@ py_test(
deps = [
":tf_example_decoder",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
],
)
......@@ -22,6 +22,7 @@ import tensorflow as tf
from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
from object_detection.utils import label_map_util
slim_example_decoder = tf.contrib.slim.tfexample_decoder
......@@ -32,12 +33,15 @@ class TfExampleDecoder(data_decoder.DataDecoder):
def __init__(self,
load_instance_masks=False,
instance_mask_type=input_reader_pb2.NUMERICAL_MASKS,
label_map_proto_file=None,
use_display_name=False):
"""Constructor sets keys_to_features and items_to_handlers.
Args:
load_instance_masks: whether or not to load and handle instance masks.
instance_mask_type: type of instance masks. Options are provided in
input_reader.proto. This is only used if `load_instance_masks` is True.
label_map_proto_file: a file path to a
object_detection.protos.StringIntLabelMap proto. If provided, then the
mapped IDs of 'image/object/class/text' will take precedence over the
......@@ -46,6 +50,11 @@ class TfExampleDecoder(data_decoder.DataDecoder):
use_display_name: whether or not to use the `display_name` for label
mapping (instead of `name`). Only used if label_map_proto_file is
provided.
Raises:
ValueError: If `instance_mask_type` option is not one of
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.PNG_MASKS.
"""
self.keys_to_features = {
'image/encoded':
......@@ -83,6 +92,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tf.VarLenFeature(tf.int64),
'image/object/group_of':
tf.VarLenFeature(tf.int64),
'image/object/weight':
tf.VarLenFeature(tf.float32),
}
self.items_to_handlers = {
fields.InputDataFields.image: slim_example_decoder.Image(
......@@ -104,19 +115,47 @@ class TfExampleDecoder(data_decoder.DataDecoder):
fields.InputDataFields.groundtruth_difficult: (
slim_example_decoder.Tensor('image/object/difficult')),
fields.InputDataFields.groundtruth_group_of: (
slim_example_decoder.Tensor('image/object/group_of'))
slim_example_decoder.Tensor('image/object/group_of')),
fields.InputDataFields.groundtruth_weights: (
slim_example_decoder.Tensor('image/object/weight')),
}
if load_instance_masks:
self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.float32)
self.items_to_handlers[
fields.InputDataFields.groundtruth_instance_masks] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/mask', 'image/height', 'image/width'],
self._reshape_instance_masks))
# TODO: Add label_handler that decodes from 'image/object/class/text'
# primarily after the recent tf.contrib.slim changes make into a release
# supported by cloudml.
label_handler = slim_example_decoder.Tensor('image/object/class/label')
if instance_mask_type in (input_reader_pb2.DEFAULT,
input_reader_pb2.NUMERICAL_MASKS):
self.keys_to_features['image/object/mask'] = (
tf.VarLenFeature(tf.float32))
self.items_to_handlers[
fields.InputDataFields.groundtruth_instance_masks] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/mask', 'image/height', 'image/width'],
self._reshape_instance_masks))
elif instance_mask_type == input_reader_pb2.PNG_MASKS:
self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string)
self.items_to_handlers[
fields.InputDataFields.groundtruth_instance_masks] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/mask', 'image/height', 'image/width'],
self._decode_png_instance_masks))
else:
raise ValueError('Did not recognize the `instance_mask_type` option.')
if label_map_proto_file:
label_map = label_map_util.get_label_map_dict(label_map_proto_file,
use_display_name)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
table = tf.contrib.lookup.HashTable(
initializer=tf.contrib.lookup.KeyValueTensorInitializer(
keys=tf.constant(list(label_map.keys())),
values=tf.constant(list(label_map.values()), dtype=tf.int64)),
default_value=-1)
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
label_handler = slim_example_decoder.BackupHandler(
slim_example_decoder.LookupTensor(
'image/object/class/text', table, default_value=''),
slim_example_decoder.Tensor('image/object/class/label'))
else:
label_handler = slim_example_decoder.Tensor('image/object/class/label')
self.items_to_handlers[
fields.InputDataFields.groundtruth_classes] = label_handler
......@@ -140,16 +179,21 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[None, 4] containing box corners.
fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
[None] containing classes for the boxes.
fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
shape [None] indicating the weights of groundtruth boxes.
fields.InputDataFields.num_groundtruth_boxes - int32 scalar indicating
the number of groundtruth_boxes.
fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
[None] containing containing object mask area in pixel squared.
fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
[None] indicating if the boxes enclose a crowd.
Optional:
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
[None] indicating if the boxes represent `group_of` instances.
fields.InputDataFields.groundtruth_instance_masks - 3D int64 tensor of
fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
shape [None, None, None] containing instance masks.
"""
serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
......@@ -161,13 +205,27 @@ class TfExampleDecoder(data_decoder.DataDecoder):
is_crowd = fields.InputDataFields.groundtruth_is_crowd
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
def default_groundtruth_weights():
return tf.ones(
[tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
dtype=tf.float32)
tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
tf.greater(
tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
default_groundtruth_weights)
return tensor_dict
def _reshape_instance_masks(self, keys_to_tensors):
"""Reshape instance segmentation masks.
The instance segmentation masks are reshaped to [num_instances, height,
width] and cast to boolean type to save memory.
width].
Args:
keys_to_tensors: a dictionary from keys to tensors.
......@@ -184,3 +242,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
masks = tf.sparse_tensor_to_dense(masks)
masks = tf.reshape(tf.to_float(tf.greater(masks, 0.0)), to_shape)
return tf.cast(masks, tf.float32)
def _decode_png_instance_masks(self, keys_to_tensors):
"""Decode PNG instance segmentation masks and stack into dense tensor.
The instance segmentation masks are reshaped to [num_instances, height,
width].
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float tensor of shape [num_instances, height, width] with values
in {0, 1}.
"""
def decode_png_mask(image_buffer):
image = tf.squeeze(
tf.image.decode_image(image_buffer, channels=1), axis=2)
image.set_shape([None, None])
image = tf.to_float(tf.greater(image, 0))
return image
png_masks = keys_to_tensors['image/object/mask']
height = keys_to_tensors['image/height']
width = keys_to_tensors['image/width']
if isinstance(png_masks, tf.SparseTensor):
png_masks = tf.sparse_tensor_to_dense(png_masks, default_value='')
return tf.cond(
tf.greater(tf.size(png_masks), 0),
lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
lambda: tf.zeros(tf.to_int32(tf.stack([0, height, width]))))
......@@ -21,6 +21,7 @@ import tensorflow as tf
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
from object_detection.protos import input_reader_pb2
class TfExampleDecoderTest(tf.test.TestCase):
......@@ -57,7 +58,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def testDecodeJpegImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
decoded_jpeg = self._DecodeImage(encoded_jpeg)
example = tf.train.Example(features=tf.train.Features(feature={
......@@ -78,7 +79,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
......@@ -96,7 +97,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
def testDecodePngImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
example = tf.train.Example(features=tf.train.Features(feature={
......@@ -116,8 +117,62 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodePngInstanceMasks(self):
image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png')
decoded_png_1 = np.squeeze(mask_1.astype(np.float32))
encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png')
decoded_png_2 = np.squeeze(mask_2.astype(np.float32))
encoded_masks = [encoded_png_1, encoded_png_2]
decoded_masks = np.stack([decoded_png_1, decoded_png_2])
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/mask': self._BytesFeature(encoded_masks)
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
decoded_masks,
tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
def testDecodeEmptyPngInstanceMasks(self):
image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
encoded_masks = []
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/mask': self._BytesFeature(encoded_masks),
'image/height': self._Int64Feature([10]),
'image/width': self._Int64Feature([10]),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape,
[0, 10, 10])
def testDecodeBoundingBox(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
......@@ -144,9 +199,39 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_ymaxs, bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual(
2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
def testDecodeDefaultGroundtruthWeights(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
np.ones(2, dtype=np.float32))
def testDecodeObjectLabel(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
......@@ -168,8 +253,131 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual(bbox_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelNoText(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [1, 2]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/class/label': self._Int64Feature(bbox_classes),
})).SerializeToString()
label_map_string = """
item {
id:1
name:'cat'
}
item {
id:2
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[None])
init = tf.tables_initializer()
with self.test_session() as sess:
sess.run(init)
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(bbox_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelUnrecognizedName(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes_text = ['cat', 'cheetah']
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
'image/format':
self._BytesFeature('jpeg'),
'image/object/class/text':
self._BytesFeature(bbox_classes_text),
})).SerializeToString()
label_map_string = """
item {
id:2
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [None])
with self.test_session() as sess:
sess.run(tf.tables_initializer())
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([2, -1],
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelWithMapping(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes_text = ['cat', 'dog']
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
'image/format':
self._BytesFeature('jpeg'),
'image/object/class/text':
self._BytesFeature(bbox_classes_text),
})).SerializeToString()
label_map_string = """
item {
id:3
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [None])
with self.test_session() as sess:
sess.run(tf.tables_initializer())
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([3, 1],
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectArea(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_area = [100., 174.]
example = tf.train.Example(features=tf.train.Features(feature={
......@@ -190,7 +398,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict[fields.InputDataFields.groundtruth_area])
def testDecodeObjectIsCrowd(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_is_crowd = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
......@@ -213,7 +421,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_is_crowd])
def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_difficult = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
......@@ -236,7 +444,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_difficult])
def testDecodeObjectGroupOf(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_group_of = [0, 1]
example = tf.train.Example(features=tf.train.Features(
......@@ -259,13 +467,37 @@ class TfExampleDecoderTest(tf.test.TestCase):
[bool(item) for item in object_group_of],
tensor_dict[fields.InputDataFields.groundtruth_group_of])
def testDecodeObjectWeight(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_weights = [0.75, 1.0]
example = tf.train.Example(features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/weight': self._FloatFeature(object_weights),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_weights].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
object_weights,
tensor_dict[fields.InputDataFields.groundtruth_weights])
def testDecodeInstanceSegmentation(self):
num_instances = 4
image_height = 5
image_width = 3
# Randomly generate image.
image_tensor = np.random.randint(255, size=(image_height,
image_tensor = np.random.randint(256, size=(image_height,
image_width,
3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
......@@ -316,7 +548,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_height = 5
image_width = 3
# Randomly generate image.
image_tensor = np.random.randint(255, size=(image_height,
image_tensor = np.random.randint(256, size=(image_height,
image_width,
3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
......
# Tensorflow Object Detection API: main runnables.
# Tensorflow Object Detection API: dataset tools.
package(
default_visibility = ["//visibility:public"],
......@@ -8,18 +8,43 @@ licenses(["notice"])
# Apache 2.0
py_binary(
name = "create_coco_tf_record",
srcs = [
"create_coco_tf_record.py",
],
deps = [
"//PIL:pil",
"//pycocotools",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
py_test(
name = "create_coco_tf_record_test",
srcs = [
"create_coco_tf_record_test.py",
],
deps = [
":create_coco_tf_record",
"//tensorflow",
],
)
py_binary(
name = "create_kitti_tf_record",
srcs = [
"create_kitti_tf_record.py",
],
deps = [
"//third_party/py/PIL:pil",
"//third_party/py/lxml",
"//PIL:pil",
"//lxml",
"//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util",
"//tensorflow_models/object_detection/utils:np_box_ops",
"//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/utils:label_map_util",
"//tensorflow/models/research/object_detection/utils:np_box_ops",
],
)
......@@ -40,11 +65,11 @@ py_binary(
"create_pascal_tf_record.py",
],
deps = [
"//third_party/py/PIL:pil",
"//third_party/py/lxml",
"//PIL:pil",
"//lxml",
"//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util",
"//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
......@@ -65,11 +90,11 @@ py_binary(
"create_pet_tf_record.py",
],
deps = [
"//third_party/py/PIL:pil",
"//third_party/py/lxml",
"//PIL:pil",
"//lxml",
"//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util",
"//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
......@@ -78,8 +103,8 @@ py_library(
srcs = ["oid_tfrecord_creation.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/utils:dataset_util",
],
)
......@@ -88,9 +113,9 @@ py_test(
srcs = ["oid_tfrecord_creation_test.py"],
deps = [
":oid_tfrecord_creation",
"//third_party/py/contextlib2",
"//third_party/py/pandas",
"//third_party/py/tensorflow",
"//contextlib2",
"//pandas",
"//tensorflow",
],
)
......@@ -99,9 +124,9 @@ py_binary(
srcs = ["create_oid_tf_record.py"],
deps = [
":oid_tfrecord_creation",
"//third_party/py/contextlib2",
"//third_party/py/pandas",
"//contextlib2",
"//pandas",
"//tensorflow",
"//tensorflow_models/object_detection/utils:label_map_util",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection.
Example usage:
python create_coco_tf_record.py --logtostderr \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import json
import os
import numpy as np
import PIL.Image
from pycocotools import mask
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
tf.flags.DEFINE_boolean('include_masks', False,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.')
tf.flags.DEFINE_string('train_image_dir', '',
'Training image directory.')
tf.flags.DEFINE_string('val_image_dir', '',
'Validation image directory.')
tf.flags.DEFINE_string('test_image_dir', '',
'Test image directory.')
tf.flags.DEFINE_string('train_annotations_file', '',
'Training annotations JSON file.')
tf.flags.DEFINE_string('val_annotations_file', '',
'Validation annotations JSON file.')
tf.flags.DEFINE_string('testdev_annotations_file', '',
'Test-dev annotations JSON file.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
FLAGS = flags.FLAGS
tf.logging.set_verbosity(tf.logging.INFO)
def create_tf_example(image,
annotations_list,
image_dir,
category_index,
include_masks=False):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys:
[u'license', u'file_name', u'coco_url', u'height', u'width',
u'date_captured', u'flickr_url', u'id']
annotations_list:
list of dicts with keys:
[u'segmentation', u'area', u'iscrowd', u'image_id',
u'bbox', u'category_id', u'id']
Notice that bounding box coordinates in the official COCO dataset are
given as [x, y, width, height] tuples using absolute coordinates where
x, y represent the top-left (0-indexed) corner. This function converts
to the format expected by the Tensorflow Object Detection API (which is
which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
to image size).
image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed
by the 'id' field of each category. See the
label_map_util.create_category_index function.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
Returns:
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
full_path = os.path.join(image_dir, filename)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
key = hashlib.sha256(encoded_jpg).hexdigest()
xmin = []
xmax = []
ymin = []
ymax = []
is_crowd = []
category_names = []
category_ids = []
area = []
encoded_mask_png = []
num_annotations_skipped = 0
for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
num_annotations_skipped += 1
continue
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8'))
area.append(object_annotations['area'])
if include_masks:
run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
image_height, image_width)
binary_mask = mask.decode(run_len_encoding)
if not object_annotations['iscrowd']:
binary_mask = np.amax(binary_mask, axis=2)
pil_image = PIL.Image.fromarray(binary_mask)
output_io = io.BytesIO()
pil_image.save(output_io, format='PNG')
encoded_mask_png.append(output_io.getvalue())
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/filename':
dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_util.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_util.bytes_feature(encoded_jpg),
'image/format':
dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin':
dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax),
'image/object/class/label':
dataset_util.int64_list_feature(category_ids),
'image/object/is_crowd':
dataset_util.int64_list_feature(is_crowd),
'image/object/area':
dataset_util.float_list_feature(area),
}
if include_masks:
feature_dict['image/object/mask'] = (
dataset_util.bytes_list_feature(encoded_mask_png))
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped
def _create_tf_record_from_coco_annotations(
annotations_file, image_dir, output_path, include_masks):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
"""
with tf.gfile.GFile(annotations_file, 'r') as fid:
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
category_index = label_map_util.create_category_index(
groundtruth_data['categories'])
annotations_index = {}
if 'annotations' in groundtruth_data:
tf.logging.info(
'Found groundtruth annotations. Building annotations index.')
for annotation in groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in annotations_index:
annotations_index[image_id] = []
annotations_index[image_id].append(annotation)
missing_annotation_count = 0
for image in images:
image_id = image['id']
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
tf.logging.info('%d images are missing annotations.',
missing_annotation_count)
tf.logging.info('writing to output path: %s', output_path)
writer = tf.python_io.TFRecordWriter(output_path)
total_num_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations_list = annotations_index[image['id']]
_, tf_example, num_annotations_skipped = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks)
total_num_annotations_skipped += num_annotations_skipped
writer.write(tf_example.SerializeToString())
writer.close()
tf.logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
def main(_):
assert FLAGS.train_image_dir, '`train_image_dir` missing.'
assert FLAGS.val_image_dir, '`val_image_dir` missing.'
assert FLAGS.test_image_dir, '`test_image_dir` missing.'
assert FLAGS.train_annotations_file, '`train_annotations_file` missing.'
assert FLAGS.val_annotations_file, '`val_annotations_file` missing.'
assert FLAGS.testdev_annotations_file, '`testdev_annotations_file` missing.'
if not tf.gfile.IsDirectory(FLAGS.output_dir):
tf.gfile.MakeDirs(FLAGS.output_dir)
train_output_path = os.path.join(FLAGS.output_dir, 'coco_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'coco_val.record')
testdev_output_path = os.path.join(FLAGS.output_dir, 'coco_testdev.record')
_create_tf_record_from_coco_annotations(
FLAGS.train_annotations_file,
FLAGS.train_image_dir,
train_output_path,
FLAGS.include_masks)
_create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file,
FLAGS.val_image_dir,
val_output_path,
FLAGS.include_masks)
_create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file,
FLAGS.test_image_dir,
testdev_output_path,
FLAGS.include_masks)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_coco_tf_record.py."""
import io
import os
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection.dataset_tools import create_coco_tf_record
class CreateCocoTFRecordTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list = [p for p in proto_field]
self.assertListEqual(proto_list, expectation)
def test_create_tf_example(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(256, 256, 3)
tmp_dir = self.get_temp_dir()
save_path = os.path.join(tmp_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 256,
'width': 256,
'id': 11,
}
annotations_list = [{
'area': .5,
'iscrowd': False,
'image_id': 11,
'bbox': [64, 64, 128, 128],
'category_id': 2,
'id': 1000,
}]
image_dir = tmp_dir
category_index = {
1: {
'name': 'dog',
'id': 1
},
2: {
'name': 'cat',
'id': 2
},
3: {
'name': 'human',
'id': 3
}
}
(_, example,
num_annotations_skipped) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index)
self.assertEqual(num_annotations_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[str(image['id'])])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
def test_create_tf_example_with_instance_masks(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(8, 8, 3)
tmp_dir = self.get_temp_dir()
save_path = os.path.join(tmp_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 8,
'width': 8,
'id': 11,
}
annotations_list = [{
'area': .5,
'iscrowd': False,
'image_id': 11,
'bbox': [0, 0, 8, 8],
'segmentation': [[4, 0, 0, 0, 0, 4], [8, 4, 4, 8, 8, 8]],
'category_id': 1,
'id': 1000,
}]
image_dir = tmp_dir
category_index = {
1: {
'name': 'dog',
'id': 1
},
}
(_, example,
num_annotations_skipped) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index, include_masks=True)
self.assertEqual(num_annotations_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [8])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [8])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[str(image['id'])])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[1])
encoded_mask_pngs = [
io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[
'image/object/mask'].bytes_list.value
]
pil_masks = [
np.array(PIL.Image.open(encoded_mask_png))
for encoded_mask_png in encoded_mask_pngs
]
self.assertTrue(len(pil_masks) == 1)
self.assertAllEqual(pil_masks[0],
[[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]])
if __name__ == '__main__':
tf.test.main()
......@@ -120,7 +120,7 @@ def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
# Filter all bounding boxes of this frame that are of a legal class, and
# don't overlap with a dontcare region.
# TODO(talremez) filter out targets that are truncated or heavily occluded.
# TODO filter out targets that are truncated or heavily occluded.
annotation_for_image = filter_annotations(img_anno, classes_to_use)
example = prepare_example(image_path, annotation_for_image, label_map_dict)
......
......@@ -24,7 +24,7 @@ import tensorflow as tf
from object_detection.dataset_tools import create_kitti_tf_record
class DictToTFExampleTest(tf.test.TestCase):
class CreateKittiTFRecordTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment