Unverified Commit fd7b6887 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #3293 from pkulzc/master

Internal changes of object_detection 
parents f98ec55e 1efe98bb
...@@ -20,7 +20,7 @@ import tensorflow as tf ...@@ -20,7 +20,7 @@ import tensorflow as tf
from object_detection.core import matcher from object_detection.core import matcher
class AnchorMatcherTest(tf.test.TestCase): class MatchTest(tf.test.TestCase):
def test_get_correct_matched_columnIndices(self): def test_get_correct_matched_columnIndices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2]) match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
...@@ -145,6 +145,48 @@ class AnchorMatcherTest(tf.test.TestCase): ...@@ -145,6 +145,48 @@ class AnchorMatcherTest(tf.test.TestCase):
self.assertAllEqual(all_indices_sorted, self.assertAllEqual(all_indices_sorted,
np.arange(num_matches, dtype=np.int32)) np.arange(num_matches, dtype=np.int32))
def test_scalar_gather_based_on_match(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
input_tensor = tf.constant([0, 1, 2, 3, 4, 5, 6, 7], dtype=tf.float32)
expected_gathered_tensor = [3, 1, 100, 0, 100, 5, 200]
match = matcher.Match(match_results)
gathered_tensor = match.gather_based_on_match(input_tensor,
unmatched_value=100.,
ignored_value=200.)
self.assertEquals(gathered_tensor.dtype, tf.float32)
with self.test_session():
gathered_tensor_out = gathered_tensor.eval()
self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
def test_multidimensional_gather_based_on_match(self):
match_results = tf.constant([1, -1, -2])
input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
dtype=tf.float32)
expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
match = matcher.Match(match_results)
gathered_tensor = match.gather_based_on_match(input_tensor,
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
self.assertEquals(gathered_tensor.dtype, tf.float32)
with self.test_session():
gathered_tensor_out = gathered_tensor.eval()
self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
def test_multidimensional_gather_based_on_match_with_matmul_gather_op(self):
match_results = tf.constant([1, -1, -2])
input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
dtype=tf.float32)
expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
match = matcher.Match(match_results, use_matmul_gather=True)
gathered_tensor = match.gather_based_on_match(input_tensor,
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
self.assertEquals(gathered_tensor.dtype, tf.float32)
with self.test_session() as sess:
self.assertTrue(
all([op.name is not 'Gather' for op in sess.graph.get_operations()]))
gathered_tensor_out = gathered_tensor.eval()
self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -39,6 +39,17 @@ resize/reshaping necessary (see docstring for the preprocess function). ...@@ -39,6 +39,17 @@ resize/reshaping necessary (see docstring for the preprocess function).
Output classes are always integers in the range [0, num_classes). Any mapping Output classes are always integers in the range [0, num_classes). Any mapping
of these integers to semantic labels is to be handled outside of this class. of these integers to semantic labels is to be handled outside of this class.
Images are resized in the `preprocess` method. All of `preprocess`, `predict`,
and `postprocess` should be reentrant.
The `preprocess` method runs `image_resizer_fn` that returns resized_images and
`true_image_shapes`. Since `image_resizer_fn` can pad the images with zeros,
true_image_shapes indicate the slices that contain the image without padding.
This is useful for padding images to be a fixed size for batching.
The `postprocess` method uses the true image shapes to clip predictions that lie
outside of images.
By default, DetectionModels produce bounding box detections; However, we support By default, DetectionModels produce bounding box detections; However, we support
a handful of auxiliary annotations associated with each bounding box, namely, a handful of auxiliary annotations associated with each bounding box, namely,
instance masks and keypoints. instance masks and keypoints.
...@@ -106,12 +117,12 @@ class DetectionModel(object): ...@@ -106,12 +117,12 @@ class DetectionModel(object):
This function is responsible for any scaling/shifting of input values that This function is responsible for any scaling/shifting of input values that
is necessary prior to running the detector on an input image. is necessary prior to running the detector on an input image.
It is also responsible for any resizing that might be necessary as images It is also responsible for any resizing, padding that might be necessary
are assumed to arrive in arbitrary sizes. While this function could as images are assumed to arrive in arbitrary sizes. While this function
conceivably be part of the predict method (below), it is often convenient could conceivably be part of the predict method (below), it is often
to keep these separate --- for example, we may want to preprocess on one convenient to keep these separate --- for example, we may want to preprocess
device, place onto a queue, and let another device (e.g., the GPU) handle on one device, place onto a queue, and let another device (e.g., the GPU)
prediction. handle prediction.
A few important notes about the preprocess function: A few important notes about the preprocess function:
+ We assume that this operation does not have any trainable variables nor + We assume that this operation does not have any trainable variables nor
...@@ -134,11 +145,15 @@ class DetectionModel(object): ...@@ -134,11 +145,15 @@ class DetectionModel(object):
Returns: Returns:
preprocessed_inputs: a [batch, height_out, width_out, channels] float32 preprocessed_inputs: a [batch, height_out, width_out, channels] float32
tensor representing a batch of images. tensor representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
""" """
pass pass
@abstractmethod @abstractmethod
def predict(self, preprocessed_inputs): def predict(self, preprocessed_inputs, true_image_shapes):
"""Predict prediction tensors from inputs tensor. """Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions. Outputs of this function can be passed to loss or postprocess functions.
...@@ -146,6 +161,10 @@ class DetectionModel(object): ...@@ -146,6 +161,10 @@ class DetectionModel(object):
Args: Args:
preprocessed_inputs: a [batch, height, width, channels] float32 tensor preprocessed_inputs: a [batch, height, width, channels] float32 tensor
representing a batch of images. representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns: Returns:
prediction_dict: a dictionary holding prediction tensors to be prediction_dict: a dictionary holding prediction tensors to be
...@@ -154,7 +173,7 @@ class DetectionModel(object): ...@@ -154,7 +173,7 @@ class DetectionModel(object):
pass pass
@abstractmethod @abstractmethod
def postprocess(self, prediction_dict, **params): def postprocess(self, prediction_dict, true_image_shapes, **params):
"""Convert predicted output tensors to final detections. """Convert predicted output tensors to final detections.
Outputs adhere to the following conventions: Outputs adhere to the following conventions:
...@@ -172,6 +191,10 @@ class DetectionModel(object): ...@@ -172,6 +191,10 @@ class DetectionModel(object):
Args: Args:
prediction_dict: a dictionary holding prediction tensors. prediction_dict: a dictionary holding prediction tensors.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
**params: Additional keyword arguments for specific implementations of **params: Additional keyword arguments for specific implementations of
DetectionModel. DetectionModel.
...@@ -190,7 +213,7 @@ class DetectionModel(object): ...@@ -190,7 +213,7 @@ class DetectionModel(object):
pass pass
@abstractmethod @abstractmethod
def loss(self, prediction_dict): def loss(self, prediction_dict, true_image_shapes):
"""Compute scalar loss tensors with respect to provided groundtruth. """Compute scalar loss tensors with respect to provided groundtruth.
Calling this function requires that groundtruth tensors have been Calling this function requires that groundtruth tensors have been
...@@ -198,6 +221,10 @@ class DetectionModel(object): ...@@ -198,6 +221,10 @@ class DetectionModel(object):
Args: Args:
prediction_dict: a dictionary holding predicted tensors prediction_dict: a dictionary holding predicted tensors
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns: Returns:
a dictionary mapping strings (loss names) to scalar tensors representing a dictionary mapping strings (loss names) to scalar tensors representing
...@@ -209,7 +236,8 @@ class DetectionModel(object): ...@@ -209,7 +236,8 @@ class DetectionModel(object):
groundtruth_boxes_list, groundtruth_boxes_list,
groundtruth_classes_list, groundtruth_classes_list,
groundtruth_masks_list=None, groundtruth_masks_list=None,
groundtruth_keypoints_list=None): groundtruth_keypoints_list=None,
groundtruth_weights_list=None):
"""Provide groundtruth tensors. """Provide groundtruth tensors.
Args: Args:
...@@ -230,10 +258,15 @@ class DetectionModel(object): ...@@ -230,10 +258,15 @@ class DetectionModel(object):
shape [num_boxes, num_keypoints, 2] containing keypoints. shape [num_boxes, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and Keypoints are assumed to be provided in normalized coordinates and
missing keypoints should be encoded as NaN. missing keypoints should be encoded as NaN.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
""" """
self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
self._groundtruth_lists[ self._groundtruth_lists[
fields.BoxListFields.classes] = groundtruth_classes_list fields.BoxListFields.classes] = groundtruth_classes_list
if groundtruth_weights_list:
self._groundtruth_lists[fields.BoxListFields.
weights] = groundtruth_weights_list
if groundtruth_masks_list: if groundtruth_masks_list:
self._groundtruth_lists[ self._groundtruth_lists[
fields.BoxListFields.masks] = groundtruth_masks_list fields.BoxListFields.masks] = groundtruth_masks_list
......
...@@ -20,6 +20,7 @@ import tensorflow as tf ...@@ -20,6 +20,7 @@ import tensorflow as tf
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import box_list_ops from object_detection.core import box_list_ops
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.utils import shape_utils
def multiclass_non_max_suppression(boxes, def multiclass_non_max_suppression(boxes,
...@@ -31,6 +32,7 @@ def multiclass_non_max_suppression(boxes, ...@@ -31,6 +32,7 @@ def multiclass_non_max_suppression(boxes,
clip_window=None, clip_window=None,
change_coordinate_frame=False, change_coordinate_frame=False,
masks=None, masks=None,
boundaries=None,
additional_fields=None, additional_fields=None,
scope=None): scope=None):
"""Multi-class version of non maximum suppression. """Multi-class version of non maximum suppression.
...@@ -66,6 +68,9 @@ def multiclass_non_max_suppression(boxes, ...@@ -66,6 +68,9 @@ def multiclass_non_max_suppression(boxes,
masks: (optional) a [k, q, mask_height, mask_width] float32 tensor masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
containing box masks. `q` can be either number of classes or 1 depending containing box masks. `q` can be either number of classes or 1 depending
on whether a separate mask is predicted per class. on whether a separate mask is predicted per class.
boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
tensor containing box boundaries. `q` can be either number of classes or 1
depending on whether a separate boundary is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose first dimensions are all of size `k`. After non-maximum tensors whose first dimensions are all of size `k`. After non-maximum
suppression, all tensors corresponding to the selected boxes will be suppression, all tensors corresponding to the selected boxes will be
...@@ -114,6 +119,8 @@ def multiclass_non_max_suppression(boxes, ...@@ -114,6 +119,8 @@ def multiclass_non_max_suppression(boxes,
per_class_boxes_list = tf.unstack(boxes, axis=1) per_class_boxes_list = tf.unstack(boxes, axis=1)
if masks is not None: if masks is not None:
per_class_masks_list = tf.unstack(masks, axis=1) per_class_masks_list = tf.unstack(masks, axis=1)
if boundaries is not None:
per_class_boundaries_list = tf.unstack(boundaries, axis=1)
boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
else [0] * num_classes) else [0] * num_classes)
for class_idx, boxes_idx in zip(range(num_classes), boxes_ids): for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
...@@ -128,6 +135,10 @@ def multiclass_non_max_suppression(boxes, ...@@ -128,6 +135,10 @@ def multiclass_non_max_suppression(boxes,
per_class_masks = per_class_masks_list[boxes_idx] per_class_masks = per_class_masks_list[boxes_idx]
boxlist_and_class_scores.add_field(fields.BoxListFields.masks, boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
per_class_masks) per_class_masks)
if boundaries is not None:
per_class_boundaries = per_class_boundaries_list[boxes_idx]
boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries,
per_class_boundaries)
if additional_fields is not None: if additional_fields is not None:
for key, tensor in additional_fields.items(): for key, tensor in additional_fields.items():
boxlist_and_class_scores.add_field(key, tensor) boxlist_and_class_scores.add_field(key, tensor)
...@@ -194,9 +205,12 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -194,9 +205,12 @@ def batch_multiclass_non_max_suppression(boxes,
max_size_per_class: maximum number of retained boxes per class. max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class. default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] clip_window: A float32 tensor of shape [batch_size, 4] where each entry is
representing the window to clip boxes to before performing non-max of the form [y_min, x_min, y_max, x_max] representing the window to clip
suppression. boxes to before performing non-max suppression. This argument can also be
a tensor of shape [4] in which case, the same clip window is applied to
all images in the batch. If clip_widow is None, all boxes are used to
perform non-max suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window relative to clip_window (this can only be set to True if a clip_window
is provided) is provided)
...@@ -242,7 +256,9 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -242,7 +256,9 @@ def batch_multiclass_non_max_suppression(boxes,
if q != 1 and q != num_classes: if q != 1 and q != num_classes:
raise ValueError('third dimension of boxes must be either 1 or equal ' raise ValueError('third dimension of boxes must be either 1 or equal '
'to the third dimension of scores') 'to the third dimension of scores')
if change_coordinate_frame and clip_window is None:
raise ValueError('if change_coordinate_frame is True, then a clip_window'
'must be specified.')
original_masks = masks original_masks = masks
original_additional_fields = additional_fields original_additional_fields = additional_fields
with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'): with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
...@@ -266,6 +282,16 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -266,6 +282,16 @@ def batch_multiclass_non_max_suppression(boxes,
masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0]) masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
masks = tf.zeros(masks_shape) masks = tf.zeros(masks_shape)
if clip_window is None:
clip_window = tf.stack([
tf.reduce_min(boxes[:, :, :, 0]),
tf.reduce_min(boxes[:, :, :, 1]),
tf.reduce_max(boxes[:, :, :, 2]),
tf.reduce_max(boxes[:, :, :, 3])
])
if clip_window.shape.ndims == 1:
clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1])
if additional_fields is None: if additional_fields is None:
additional_fields = {} additional_fields = {}
...@@ -283,6 +309,9 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -283,6 +309,9 @@ def batch_multiclass_non_max_suppression(boxes,
per_image_masks - A [num_anchors, q, mask_height, mask_width] float32 per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
tensor containing box masks. `q` can be either number of classes tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class. or 1 depending on whether a separate mask is predicted per class.
per_image_clip_window - A 1D float32 tensor of the form
[ymin, xmin, ymax, xmax] representing the window to clip the boxes
to.
per_image_additional_fields - (optional) A variable number of float32 per_image_additional_fields - (optional) A variable number of float32
tensors each with size [num_anchors, ...]. tensors each with size [num_anchors, ...].
per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
...@@ -311,9 +340,10 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -311,9 +340,10 @@ def batch_multiclass_non_max_suppression(boxes,
per_image_boxes = args[0] per_image_boxes = args[0]
per_image_scores = args[1] per_image_scores = args[1]
per_image_masks = args[2] per_image_masks = args[2]
per_image_clip_window = args[3]
per_image_additional_fields = { per_image_additional_fields = {
key: value key: value
for key, value in zip(additional_fields, args[3:-1]) for key, value in zip(additional_fields, args[4:-1])
} }
per_image_num_valid_boxes = args[-1] per_image_num_valid_boxes = args[-1]
per_image_boxes = tf.reshape( per_image_boxes = tf.reshape(
...@@ -345,7 +375,7 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -345,7 +375,7 @@ def batch_multiclass_non_max_suppression(boxes,
iou_thresh, iou_thresh,
max_size_per_class, max_size_per_class,
max_total_size, max_total_size,
clip_window=clip_window, clip_window=per_image_clip_window,
change_coordinate_frame=change_coordinate_frame, change_coordinate_frame=change_coordinate_frame,
masks=per_image_masks, masks=per_image_masks,
additional_fields=per_image_additional_fields) additional_fields=per_image_additional_fields)
...@@ -367,10 +397,10 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -367,10 +397,10 @@ def batch_multiclass_non_max_suppression(boxes,
num_additional_fields = len(additional_fields) num_additional_fields = len(additional_fields)
num_nmsed_outputs = 4 + num_additional_fields num_nmsed_outputs = 4 + num_additional_fields
batch_outputs = tf.map_fn( batch_outputs = shape_utils.static_or_dynamic_map_fn(
_single_image_nms_fn, _single_image_nms_fn,
elems=([boxes, scores, masks] + list(additional_fields.values()) + elems=([boxes, scores, masks, clip_window] +
[num_valid_boxes]), list(additional_fields.values()) + [num_valid_boxes]),
dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]), dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
parallel_iterations=parallel_iterations) parallel_iterations=parallel_iterations)
......
...@@ -571,6 +571,125 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase): ...@@ -571,6 +571,125 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
self.assertAllClose(nmsed_classes, exp_nms_classes) self.assertAllClose(nmsed_classes, exp_nms_classes)
self.assertAllClose(num_detections, [2, 3]) self.assertAllClose(num_detections, [2, 3])
def test_batch_multiclass_nms_with_per_batch_clip_window(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
tf.float32)
scores = tf.constant([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]])
clip_window = tf.constant([0., 0., 200., 200.])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = np.array([[[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 10.1, 1, 11.1],
[0, 100, 1, 101],
[0, 0, 0, 0],
[0, 0, 0, 0]]])
exp_nms_scores = np.array([[.95, .9, 0, 0],
[.5, .3, 0, 0]])
exp_nms_classes = np.array([[0, 0, 0, 0],
[0, 0, 0, 0]])
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
nmsed_additional_fields, num_detections
) = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size,
clip_window=clip_window)
self.assertIsNone(nmsed_masks)
self.assertIsNone(nmsed_additional_fields)
# Check static shapes
self.assertAllEqual(nmsed_boxes.shape.as_list(),
exp_nms_corners.shape)
self.assertAllEqual(nmsed_scores.shape.as_list(),
exp_nms_scores.shape)
self.assertAllEqual(nmsed_classes.shape.as_list(),
exp_nms_classes.shape)
self.assertEqual(num_detections.shape.as_list(), [2])
with self.test_session() as sess:
(nmsed_boxes, nmsed_scores, nmsed_classes,
num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
num_detections])
self.assertAllClose(nmsed_boxes, exp_nms_corners)
self.assertAllClose(nmsed_scores, exp_nms_scores)
self.assertAllClose(nmsed_classes, exp_nms_classes)
self.assertAllClose(num_detections, [2, 2])
def test_batch_multiclass_nms_with_per_image_clip_window(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
tf.float32)
scores = tf.constant([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]])
clip_window = tf.constant([[0., 0., 5., 5.],
[0., 0., 200., 200.]])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = np.array([[[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 10.1, 1, 11.1],
[0, 100, 1, 101],
[0, 0, 0, 0],
[0, 0, 0, 0]]])
exp_nms_scores = np.array([[.9, 0., 0., 0.],
[.5, .3, 0, 0]])
exp_nms_classes = np.array([[0, 0, 0, 0],
[0, 0, 0, 0]])
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
nmsed_additional_fields, num_detections
) = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size,
clip_window=clip_window)
self.assertIsNone(nmsed_masks)
self.assertIsNone(nmsed_additional_fields)
# Check static shapes
self.assertAllEqual(nmsed_boxes.shape.as_list(),
exp_nms_corners.shape)
self.assertAllEqual(nmsed_scores.shape.as_list(),
exp_nms_scores.shape)
self.assertAllEqual(nmsed_classes.shape.as_list(),
exp_nms_classes.shape)
self.assertEqual(num_detections.shape.as_list(), [2])
with self.test_session() as sess:
(nmsed_boxes, nmsed_scores, nmsed_classes,
num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
num_detections])
self.assertAllClose(nmsed_boxes, exp_nms_corners)
self.assertAllClose(nmsed_scores, exp_nms_scores)
self.assertAllClose(nmsed_classes, exp_nms_classes)
self.assertAllClose(num_detections, [1, 2])
def test_batch_multiclass_nms_with_masks(self): def test_batch_multiclass_nms_with_masks(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]], boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]], [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
......
...@@ -35,6 +35,27 @@ in each row there is a box with [ymin xmin ymax xmax]. ...@@ -35,6 +35,27 @@ in each row there is a box with [ymin xmin ymax xmax].
Boxes are in normalized coordinates meaning Boxes are in normalized coordinates meaning
their coordinate values range in [0, 1] their coordinate values range in [0, 1]
To preprocess multiple images with the same operations in cases where
nondeterministic operations are used, a preprocessor_cache.PreprocessorCache
object can be passed into the preprocess function or individual operations.
All nondeterministic operations except random_jitter_boxes support caching.
E.g.
Let tensor_dict{1,2,3,4,5} be copies of the same inputs.
Let preprocess_options contain nondeterministic operation(s) excluding
random_jitter_boxes.
cache1 = preprocessor_cache.PreprocessorCache()
cache2 = preprocessor_cache.PreprocessorCache()
a = preprocess(tensor_dict1, preprocess_options, preprocess_vars_cache=cache1)
b = preprocess(tensor_dict2, preprocess_options, preprocess_vars_cache=cache1)
c = preprocess(tensor_dict3, preprocess_options, preprocess_vars_cache=cache2)
d = preprocess(tensor_dict4, preprocess_options, preprocess_vars_cache=cache2)
e = preprocess(tensor_dict5, preprocess_options)
Then correspondings tensors of object pairs (a,b) and (c,d)
are guaranteed to be equal element-wise, but the equality of any other object
pair cannot be determined.
Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
functions receive a rank 3 tensor for processing the image. Thus, inside the functions receive a rank 3 tensor for processing the image. Thus, inside the
preprocess function we squeeze the image to become a rank 3 tensor and then preprocess function we squeeze the image to become a rank 3 tensor and then
...@@ -42,6 +63,8 @@ we pass it to the functions. At the end of the preprocess we expand the image ...@@ -42,6 +63,8 @@ we pass it to the functions. At the end of the preprocess we expand the image
back to rank 4. back to rank 4.
""" """
import functools
import inspect
import sys import sys
import tensorflow as tf import tensorflow as tf
...@@ -50,44 +73,79 @@ from tensorflow.python.ops import control_flow_ops ...@@ -50,44 +73,79 @@ from tensorflow.python.ops import control_flow_ops
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import box_list_ops from object_detection.core import box_list_ops
from object_detection.core import keypoint_ops from object_detection.core import keypoint_ops
from object_detection.core import preprocessor_cache
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.utils import shape_utils
def _apply_with_random_selector(x, func, num_cases): def _apply_with_random_selector(x,
func,
num_cases,
preprocess_vars_cache=None,
key=''):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1]. """Computes func(x, sel), with sel sampled from [0...num_cases-1].
If both preprocess_vars_cache AND key are the same between two calls, sel will
be the same value in both calls.
Args: Args:
x: input Tensor. x: input Tensor.
func: Python function to apply. func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from. num_cases: Python int32, number of cases to sample sel from.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: variable identifier for preprocess_vars_cache.
Returns: Returns:
The result of func(x, sel), where func receives the value of the The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically. selector as a python integer, but sel is sampled dynamically.
""" """
rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) generator_func = functools.partial(
tf.random_uniform, [], maxval=num_cases, dtype=tf.int32)
rand_sel = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.SELECTOR,
preprocess_vars_cache, key)
# Pass the real x only to one of the func calls. # Pass the real x only to one of the func calls.
return control_flow_ops.merge([func( return control_flow_ops.merge([func(
control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case) control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case)
for case in range(num_cases)])[0] for case in range(num_cases)])[0]
def _apply_with_random_selector_tuples(x, func, num_cases): def _apply_with_random_selector_tuples(x,
func,
num_cases,
preprocess_vars_cache=None,
key=''):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1]. """Computes func(x, sel), with sel sampled from [0...num_cases-1].
If both preprocess_vars_cache AND key are the same between two calls, sel will
be the same value in both calls.
Args: Args:
x: A tuple of input tensors. x: A tuple of input tensors.
func: Python function to apply. func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from. num_cases: Python int32, number of cases to sample sel from.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: variable identifier for preprocess_vars_cache.
Returns: Returns:
The result of func(x, sel), where func receives the value of the The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically. selector as a python integer, but sel is sampled dynamically.
""" """
num_inputs = len(x) num_inputs = len(x)
rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) generator_func = functools.partial(
# Pass the real x only to one of the func calls. tf.random_uniform, [], maxval=num_cases, dtype=tf.int32)
rand_sel = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.SELECTOR_TUPLES,
preprocess_vars_cache, key)
# Pass the real x only to one of the func calls.
tuples = [list() for t in x] tuples = [list() for t in x]
for case in range(num_cases): for case in range(num_cases):
new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x] new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x]
...@@ -100,6 +158,37 @@ def _apply_with_random_selector_tuples(x, func, num_cases): ...@@ -100,6 +158,37 @@ def _apply_with_random_selector_tuples(x, func, num_cases):
return tuple(tuples) return tuple(tuples)
def _get_or_create_preprocess_rand_vars(generator_func,
function_id,
preprocess_vars_cache,
key=''):
"""Returns a tensor stored in preprocess_vars_cache or using generator_func.
If the tensor was previously generated and appears in the PreprocessorCache,
the previously generated tensor will be returned. Otherwise, a new tensor
is generated using generator_func and stored in the cache.
Args:
generator_func: A 0-argument function that generates a tensor.
function_id: identifier for the preprocessing function used.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: identifier for the variable stored.
Returns:
The generated tensor.
"""
if preprocess_vars_cache is not None:
var = preprocess_vars_cache.get(function_id, key)
if var is None:
var = generator_func()
preprocess_vars_cache.update(function_id, key, var)
else:
var = generator_func()
return var
def _random_integer(minval, maxval, seed): def _random_integer(minval, maxval, seed):
"""Returns a random 0-D tensor between minval and maxval. """Returns a random 0-D tensor between minval and maxval.
...@@ -115,6 +204,40 @@ def _random_integer(minval, maxval, seed): ...@@ -115,6 +204,40 @@ def _random_integer(minval, maxval, seed):
[], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed) [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
# TODO: This method is needed because the current
# tf.image.rgb_to_grayscale method does not support quantization. Replace with
# tf.image.rgb_to_grayscale after quantization support is added.
def _rgb_to_grayscale(images, name=None):
"""Converts one or more images from RGB to Grayscale.
Outputs a tensor of the same `DType` and rank as `images`. The size of the
last dimension of the output is 1, containing the Grayscale value of the
pixels.
Args:
images: The RGB tensor to convert. Last dimension must have size 3 and
should contain RGB values.
name: A name for the operation (optional).
Returns:
The converted grayscale image(s).
"""
with tf.name_scope(name, 'rgb_to_grayscale', [images]) as name:
images = tf.convert_to_tensor(images, name='images')
# Remember original dtype to so we can convert back if needed
orig_dtype = images.dtype
flt_image = tf.image.convert_image_dtype(images, tf.float32)
# Reference for converting between RGB and grayscale.
# https://en.wikipedia.org/wiki/Luma_%28video%29
rgb_weights = [0.2989, 0.5870, 0.1140]
rank_1 = tf.expand_dims(tf.rank(images) - 1, 0)
gray_float = tf.reduce_sum(
flt_image * rgb_weights, rank_1, keepdims=True)
gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
return tf.image.convert_image_dtype(gray_float, orig_dtype, name=name)
def normalize_image(image, original_minval, original_maxval, target_minval, def normalize_image(image, original_minval, original_maxval, target_minval,
target_maxval): target_maxval):
"""Normalizes pixel values in the image. """Normalizes pixel values in the image.
...@@ -312,7 +435,8 @@ def random_horizontal_flip(image, ...@@ -312,7 +435,8 @@ def random_horizontal_flip(image,
masks=None, masks=None,
keypoints=None, keypoints=None,
keypoint_flip_permutation=None, keypoint_flip_permutation=None,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Randomly flips the image and detections horizontally. """Randomly flips the image and detections horizontally.
The probability of flipping the image is 50%. The probability of flipping the image is 50%.
...@@ -333,6 +457,10 @@ def random_horizontal_flip(image, ...@@ -333,6 +457,10 @@ def random_horizontal_flip(image,
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. permutation.
seed: random seed seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
...@@ -364,7 +492,12 @@ def random_horizontal_flip(image, ...@@ -364,7 +492,12 @@ def random_horizontal_flip(image,
with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]): with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
result = [] result = []
# random variable defining whether to do flip or not # random variable defining whether to do flip or not
do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5) generator_func = functools.partial(tf.random_uniform, [], seed=seed)
do_a_flip_random = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.HORIZONTAL_FLIP,
preprocess_vars_cache)
do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
# flip image # flip image
image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image) image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
...@@ -399,7 +532,8 @@ def random_vertical_flip(image, ...@@ -399,7 +532,8 @@ def random_vertical_flip(image,
masks=None, masks=None,
keypoints=None, keypoints=None,
keypoint_flip_permutation=None, keypoint_flip_permutation=None,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Randomly flips the image and detections vertically. """Randomly flips the image and detections vertically.
The probability of flipping the image is 50%. The probability of flipping the image is 50%.
...@@ -420,6 +554,10 @@ def random_vertical_flip(image, ...@@ -420,6 +554,10 @@ def random_vertical_flip(image,
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. permutation.
seed: random seed seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
...@@ -451,7 +589,11 @@ def random_vertical_flip(image, ...@@ -451,7 +589,11 @@ def random_vertical_flip(image,
with tf.name_scope('RandomVerticalFlip', values=[image, boxes]): with tf.name_scope('RandomVerticalFlip', values=[image, boxes]):
result = [] result = []
# random variable defining whether to do flip or not # random variable defining whether to do flip or not
do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5) generator_func = functools.partial(tf.random_uniform, [], seed=seed)
do_a_flip_random = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.VERTICAL_FLIP,
preprocess_vars_cache)
do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
# flip image # flip image
image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image) image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
...@@ -485,7 +627,8 @@ def random_rotation90(image, ...@@ -485,7 +627,8 @@ def random_rotation90(image,
boxes=None, boxes=None,
masks=None, masks=None,
keypoints=None, keypoints=None,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Randomly rotates the image and detections 90 degrees counter-clockwise. """Randomly rotates the image and detections 90 degrees counter-clockwise.
The probability of rotating the image is 50%. This can be combined with The probability of rotating the image is 50%. This can be combined with
...@@ -507,6 +650,10 @@ def random_rotation90(image, ...@@ -507,6 +650,10 @@ def random_rotation90(image,
[num_instances, num_keypoints, 2]. The keypoints are in y-x [num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates. normalized coordinates.
seed: random seed seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
...@@ -532,7 +679,11 @@ def random_rotation90(image, ...@@ -532,7 +679,11 @@ def random_rotation90(image,
result = [] result = []
# random variable defining whether to rotate by 90 degrees or not # random variable defining whether to rotate by 90 degrees or not
do_a_rot90_random = tf.greater(tf.random_uniform([], seed=seed), 0.5) generator_func = functools.partial(tf.random_uniform, [], seed=seed)
do_a_rot90_random = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.ROTATION90,
preprocess_vars_cache)
do_a_rot90_random = tf.greater(do_a_rot90_random, 0.5)
# flip image # flip image
image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image), image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image),
...@@ -562,7 +713,11 @@ def random_rotation90(image, ...@@ -562,7 +713,11 @@ def random_rotation90(image,
return tuple(result) return tuple(result)
def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None): def random_pixel_value_scale(image,
minval=0.9,
maxval=1.1,
seed=None,
preprocess_vars_cache=None):
"""Scales each value in the pixels of the image. """Scales each value in the pixels of the image.
This function scales each pixel independent of the other ones. This function scales each pixel independent of the other ones.
...@@ -575,17 +730,24 @@ def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None): ...@@ -575,17 +730,24 @@ def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
minval: lower ratio of scaling pixel values. minval: lower ratio of scaling pixel values.
maxval: upper ratio of scaling pixel values. maxval: upper ratio of scaling pixel values.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
""" """
with tf.name_scope('RandomPixelValueScale', values=[image]): with tf.name_scope('RandomPixelValueScale', values=[image]):
color_coef = tf.random_uniform( generator_func = functools.partial(
tf.shape(image), tf.random_uniform, tf.shape(image),
minval=minval, minval=minval, maxval=maxval,
maxval=maxval, dtype=tf.float32, seed=seed)
dtype=tf.float32, color_coef = _get_or_create_preprocess_rand_vars(
seed=seed) generator_func,
preprocessor_cache.PreprocessorCache.PIXEL_VALUE_SCALE,
preprocess_vars_cache)
image = tf.multiply(image, color_coef) image = tf.multiply(image, color_coef)
image = tf.clip_by_value(image, 0.0, 1.0) image = tf.clip_by_value(image, 0.0, 1.0)
...@@ -596,7 +758,8 @@ def random_image_scale(image, ...@@ -596,7 +758,8 @@ def random_image_scale(image,
masks=None, masks=None,
min_scale_ratio=0.5, min_scale_ratio=0.5,
max_scale_ratio=2.0, max_scale_ratio=2.0,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Scales the image size. """Scales the image size.
Args: Args:
...@@ -607,6 +770,10 @@ def random_image_scale(image, ...@@ -607,6 +770,10 @@ def random_image_scale(image,
min_scale_ratio: minimum scaling ratio. min_scale_ratio: minimum scaling ratio.
max_scale_ratio: maximum scaling ratio. max_scale_ratio: maximum scaling ratio.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same rank as input image. image: image which is the same rank as input image.
...@@ -618,10 +785,14 @@ def random_image_scale(image, ...@@ -618,10 +785,14 @@ def random_image_scale(image,
image_shape = tf.shape(image) image_shape = tf.shape(image)
image_height = image_shape[0] image_height = image_shape[0]
image_width = image_shape[1] image_width = image_shape[1]
size_coef = tf.random_uniform([], generator_func = functools.partial(
minval=min_scale_ratio, tf.random_uniform, [],
maxval=max_scale_ratio, minval=min_scale_ratio, maxval=max_scale_ratio,
dtype=tf.float32, seed=seed) dtype=tf.float32, seed=seed)
size_coef = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.IMAGE_SCALE,
preprocess_vars_cache)
image_newysize = tf.to_int32( image_newysize = tf.to_int32(
tf.multiply(tf.to_float(image_height), size_coef)) tf.multiply(tf.to_float(image_height), size_coef))
image_newxsize = tf.to_int32( image_newxsize = tf.to_int32(
...@@ -636,7 +807,10 @@ def random_image_scale(image, ...@@ -636,7 +807,10 @@ def random_image_scale(image,
return tuple(result) return tuple(result)
def random_rgb_to_gray(image, probability=0.1, seed=None): def random_rgb_to_gray(image,
probability=0.1,
seed=None,
preprocess_vars_cache=None):
"""Changes the image from RGB to Grayscale with the given probability. """Changes the image from RGB to Grayscale with the given probability.
Args: Args:
...@@ -645,18 +819,25 @@ def random_rgb_to_gray(image, probability=0.1, seed=None): ...@@ -645,18 +819,25 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
probability: the probability of returning a grayscale image. probability: the probability of returning a grayscale image.
The probability should be a number between [0, 1]. The probability should be a number between [0, 1].
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
""" """
def _image_to_gray(image): def _image_to_gray(image):
image_gray1 = tf.image.rgb_to_grayscale(image) image_gray1 = _rgb_to_grayscale(image)
image_gray3 = tf.image.grayscale_to_rgb(image_gray1) image_gray3 = tf.image.grayscale_to_rgb(image_gray1)
return image_gray3 return image_gray3
with tf.name_scope('RandomRGBtoGray', values=[image]): with tf.name_scope('RandomRGBtoGray', values=[image]):
# random variable defining whether to do flip or not # random variable defining whether to change to grayscale or not
do_gray_random = tf.random_uniform([], seed=seed) generator_func = functools.partial(tf.random_uniform, [], seed=seed)
do_gray_random = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.RGB_TO_GRAY,
preprocess_vars_cache)
image = tf.cond( image = tf.cond(
tf.greater(do_gray_random, probability), lambda: image, tf.greater(do_gray_random, probability), lambda: image,
...@@ -665,7 +846,10 @@ def random_rgb_to_gray(image, probability=0.1, seed=None): ...@@ -665,7 +846,10 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
return image return image
def random_adjust_brightness(image, max_delta=0.2): def random_adjust_brightness(image,
max_delta=0.2,
seed=None,
preprocess_vars_cache=None):
"""Randomly adjusts brightness. """Randomly adjusts brightness.
Makes sure the output image is still between 0 and 1. Makes sure the output image is still between 0 and 1.
...@@ -674,18 +858,34 @@ def random_adjust_brightness(image, max_delta=0.2): ...@@ -674,18 +858,34 @@ def random_adjust_brightness(image, max_delta=0.2):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels] image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1]. with pixel values varying between [0, 1].
max_delta: how much to change the brightness. A value between [0, 1). max_delta: how much to change the brightness. A value between [0, 1).
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
boxes: boxes which is the same shape as input boxes. boxes: boxes which is the same shape as input boxes.
""" """
with tf.name_scope('RandomAdjustBrightness', values=[image]): with tf.name_scope('RandomAdjustBrightness', values=[image]):
image = tf.image.random_brightness(image, max_delta) generator_func = functools.partial(tf.random_uniform, [],
-max_delta, max_delta, seed=seed)
delta = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.ADJUST_BRIGHTNESS,
preprocess_vars_cache)
image = tf.image.adjust_brightness(image, delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image return image
def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25): def random_adjust_contrast(image,
min_delta=0.8,
max_delta=1.25,
seed=None,
preprocess_vars_cache=None):
"""Randomly adjusts contrast. """Randomly adjusts contrast.
Makes sure the output image is still between 0 and 1. Makes sure the output image is still between 0 and 1.
...@@ -697,17 +897,31 @@ def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25): ...@@ -697,17 +897,31 @@ def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
max_delta: how much to change the contrast. Contrast will change with a max_delta: how much to change the contrast. Contrast will change with a
value between min_delta and max_delta. This value will be value between min_delta and max_delta. This value will be
multiplied to the current contrast of the image. multiplied to the current contrast of the image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
""" """
with tf.name_scope('RandomAdjustContrast', values=[image]): with tf.name_scope('RandomAdjustContrast', values=[image]):
image = tf.image.random_contrast(image, min_delta, max_delta) generator_func = functools.partial(tf.random_uniform, [],
min_delta, max_delta, seed=seed)
contrast_factor = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.ADJUST_CONTRAST,
preprocess_vars_cache)
image = tf.image.adjust_contrast(image, contrast_factor)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image return image
def random_adjust_hue(image, max_delta=0.02): def random_adjust_hue(image,
max_delta=0.02,
seed=None,
preprocess_vars_cache=None):
"""Randomly adjusts hue. """Randomly adjusts hue.
Makes sure the output image is still between 0 and 1. Makes sure the output image is still between 0 and 1.
...@@ -716,17 +930,31 @@ def random_adjust_hue(image, max_delta=0.02): ...@@ -716,17 +930,31 @@ def random_adjust_hue(image, max_delta=0.02):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels] image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1]. with pixel values varying between [0, 1].
max_delta: change hue randomly with a value between 0 and max_delta. max_delta: change hue randomly with a value between 0 and max_delta.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
""" """
with tf.name_scope('RandomAdjustHue', values=[image]): with tf.name_scope('RandomAdjustHue', values=[image]):
image = tf.image.random_hue(image, max_delta) generator_func = functools.partial(tf.random_uniform, [],
-max_delta, max_delta, seed=seed)
delta = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.ADJUST_HUE,
preprocess_vars_cache)
image = tf.image.adjust_hue(image, delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image return image
def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25): def random_adjust_saturation(image,
min_delta=0.8,
max_delta=1.25,
seed=None,
preprocess_vars_cache=None):
"""Randomly adjusts saturation. """Randomly adjusts saturation.
Makes sure the output image is still between 0 and 1. Makes sure the output image is still between 0 and 1.
...@@ -738,17 +966,28 @@ def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25): ...@@ -738,17 +966,28 @@ def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
max_delta: how much to change the saturation. Saturation will change with a max_delta: how much to change the saturation. Saturation will change with a
value between min_delta and max_delta. This value will be value between min_delta and max_delta. This value will be
multiplied to the current saturation of the image. multiplied to the current saturation of the image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
""" """
with tf.name_scope('RandomAdjustSaturation', values=[image]): with tf.name_scope('RandomAdjustSaturation', values=[image]):
image = tf.image.random_saturation(image, min_delta, max_delta) generator_func = functools.partial(tf.random_uniform, [],
min_delta, max_delta, seed=seed)
saturation_factor = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.ADJUST_SATURATION,
preprocess_vars_cache)
image = tf.image.adjust_saturation(image, saturation_factor)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image return image
def random_distort_color(image, color_ordering=0): def random_distort_color(image, color_ordering=0, preprocess_vars_cache=None):
"""Randomly distorts color. """Randomly distorts color.
Randomly distorts color using a combination of brightness, hue, contrast Randomly distorts color using a combination of brightness, hue, contrast
...@@ -758,6 +997,10 @@ def random_distort_color(image, color_ordering=0): ...@@ -758,6 +997,10 @@ def random_distort_color(image, color_ordering=0):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels] image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1]. with pixel values varying between [0, 1].
color_ordering: Python int, a type of distortion (valid values: 0, 1). color_ordering: Python int, a type of distortion (valid values: 0, 1).
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
...@@ -767,20 +1010,34 @@ def random_distort_color(image, color_ordering=0): ...@@ -767,20 +1010,34 @@ def random_distort_color(image, color_ordering=0):
""" """
with tf.name_scope('RandomDistortColor', values=[image]): with tf.name_scope('RandomDistortColor', values=[image]):
if color_ordering == 0: if color_ordering == 0:
image = tf.image.random_brightness(image, max_delta=32. / 255.) image = random_adjust_brightness(
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) image, max_delta=32. / 255.,
image = tf.image.random_hue(image, max_delta=0.2) preprocess_vars_cache=preprocess_vars_cache)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) image = random_adjust_saturation(
image, min_delta=0.5, max_delta=1.5,
preprocess_vars_cache=preprocess_vars_cache)
image = random_adjust_hue(
image, max_delta=0.2,
preprocess_vars_cache=preprocess_vars_cache)
image = random_adjust_contrast(
image, min_delta=0.5, max_delta=1.5,
preprocess_vars_cache=preprocess_vars_cache)
elif color_ordering == 1: elif color_ordering == 1:
image = tf.image.random_brightness(image, max_delta=32. / 255.) image = random_adjust_brightness(
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) image, max_delta=32. / 255.,
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) preprocess_vars_cache=preprocess_vars_cache)
image = tf.image.random_hue(image, max_delta=0.2) image = random_adjust_contrast(
image, min_delta=0.5, max_delta=1.5,
preprocess_vars_cache=preprocess_vars_cache)
image = random_adjust_saturation(
image, min_delta=0.5, max_delta=1.5,
preprocess_vars_cache=preprocess_vars_cache)
image = random_adjust_hue(
image, max_delta=0.2,
preprocess_vars_cache=preprocess_vars_cache)
else: else:
raise ValueError('color_ordering must be in {0, 1}') raise ValueError('color_ordering must be in {0, 1}')
# The random_* ops do not necessarily clamp.
image = tf.clip_by_value(image, 0.0, 1.0)
return image return image
...@@ -845,7 +1102,8 @@ def _strict_random_crop_image(image, ...@@ -845,7 +1102,8 @@ def _strict_random_crop_image(image,
min_object_covered=1.0, min_object_covered=1.0,
aspect_ratio_range=(0.75, 1.33), aspect_ratio_range=(0.75, 1.33),
area_range=(0.1, 1.0), area_range=(0.1, 1.0),
overlap_thresh=0.3): overlap_thresh=0.3,
preprocess_vars_cache=None):
"""Performs random crop. """Performs random crop.
Note: boxes will be clipped to the crop. Keypoint coordinates that are Note: boxes will be clipped to the crop. Keypoint coordinates that are
...@@ -878,6 +1136,10 @@ def _strict_random_crop_image(image, ...@@ -878,6 +1136,10 @@ def _strict_random_crop_image(image,
original image. original image.
overlap_thresh: minimum overlap thresh with new cropped overlap_thresh: minimum overlap thresh with new cropped
image to keep the box. image to keep the box.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same rank as input image. image: image which is the same rank as input image.
...@@ -900,7 +1162,8 @@ def _strict_random_crop_image(image, ...@@ -900,7 +1162,8 @@ def _strict_random_crop_image(image,
tf.clip_by_value( tf.clip_by_value(
boxes, clip_value_min=0.0, clip_value_max=1.0), 1) boxes, clip_value_min=0.0, clip_value_max=1.0), 1)
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( generator_func = functools.partial(
tf.image.sample_distorted_bounding_box,
image_shape, image_shape,
bounding_boxes=boxes_expanded, bounding_boxes=boxes_expanded,
min_object_covered=min_object_covered, min_object_covered=min_object_covered,
...@@ -909,6 +1172,13 @@ def _strict_random_crop_image(image, ...@@ -909,6 +1172,13 @@ def _strict_random_crop_image(image,
max_attempts=100, max_attempts=100,
use_image_if_no_bounding_boxes=True) use_image_if_no_bounding_boxes=True)
# for ssd cropping, each value of min_object_covered has its own
# cached random variable
sample_distorted_bounding_box = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.STRICT_CROP_IMAGE,
preprocess_vars_cache, key=min_object_covered)
im_box_begin, im_box_size, im_box = sample_distorted_bounding_box im_box_begin, im_box_size, im_box = sample_distorted_bounding_box
new_image = tf.slice(image, im_box_begin, im_box_size) new_image = tf.slice(image, im_box_begin, im_box_size)
...@@ -984,7 +1254,8 @@ def random_crop_image(image, ...@@ -984,7 +1254,8 @@ def random_crop_image(image,
area_range=(0.1, 1.0), area_range=(0.1, 1.0),
overlap_thresh=0.3, overlap_thresh=0.3,
random_coef=0.0, random_coef=0.0,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Randomly crops the image. """Randomly crops the image.
Given the input image and its bounding boxes, this op randomly Given the input image and its bounding boxes, this op randomly
...@@ -1029,6 +1300,10 @@ def random_crop_image(image, ...@@ -1029,6 +1300,10 @@ def random_crop_image(image,
cropped image, and if it is 1.0, we will always get the cropped image, and if it is 1.0, we will always get the
original image. original image.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: Image shape will be [new_height, new_width, channels]. image: Image shape will be [new_height, new_width, channels].
...@@ -1056,13 +1331,17 @@ def random_crop_image(image, ...@@ -1056,13 +1331,17 @@ def random_crop_image(image,
min_object_covered=min_object_covered, min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range, aspect_ratio_range=aspect_ratio_range,
area_range=area_range, area_range=area_range,
overlap_thresh=overlap_thresh) overlap_thresh=overlap_thresh,
preprocess_vars_cache=preprocess_vars_cache)
# avoids tf.cond to make faster RCNN training on borg. See b/140057645. # avoids tf.cond to make faster RCNN training on borg. See b/140057645.
if random_coef < sys.float_info.min: if random_coef < sys.float_info.min:
result = strict_random_crop_image_fn() result = strict_random_crop_image_fn()
else: else:
do_a_crop_random = tf.random_uniform([], seed=seed) generator_func = functools.partial(tf.random_uniform, [], seed=seed)
do_a_crop_random = _get_or_create_preprocess_rand_vars(
generator_func, preprocessor_cache.PreprocessorCache.CROP_IMAGE,
preprocess_vars_cache)
do_a_crop_random = tf.greater(do_a_crop_random, random_coef) do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
outputs = [image, boxes, labels] outputs = [image, boxes, labels]
...@@ -1084,7 +1363,8 @@ def random_pad_image(image, ...@@ -1084,7 +1363,8 @@ def random_pad_image(image,
min_image_size=None, min_image_size=None,
max_image_size=None, max_image_size=None,
pad_color=None, pad_color=None,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Randomly pads the image. """Randomly pads the image.
This function randomly pads the image with zeros. The final size of the This function randomly pads the image with zeros. The final size of the
...@@ -1110,8 +1390,11 @@ def random_pad_image(image, ...@@ -1110,8 +1390,11 @@ def random_pad_image(image,
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32. pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the input if set as None, it will be set to average color of the input
image. image.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: Image shape will be [new_height, new_width, channels]. image: Image shape will be [new_height, new_width, channels].
...@@ -1155,6 +1438,12 @@ def random_pad_image(image, ...@@ -1155,6 +1438,12 @@ def random_pad_image(image,
lambda: _random_integer(0, target_width - image_width, seed), lambda: _random_integer(0, target_width - image_width, seed),
lambda: tf.constant(0, dtype=tf.int32)) lambda: tf.constant(0, dtype=tf.int32))
gen_func = lambda: (target_height, target_width, offset_height, offset_width)
params = _get_or_create_preprocess_rand_vars(
gen_func, preprocessor_cache.PreprocessorCache.PAD_IMAGE,
preprocess_vars_cache)
target_height, target_width, offset_height, offset_width = params
new_image = tf.image.pad_to_bounding_box( new_image = tf.image.pad_to_bounding_box(
image, image,
offset_height=offset_height, offset_height=offset_height,
...@@ -1200,7 +1489,8 @@ def random_crop_pad_image(image, ...@@ -1200,7 +1489,8 @@ def random_crop_pad_image(image,
min_padded_size_ratio=(1.0, 1.0), min_padded_size_ratio=(1.0, 1.0),
max_padded_size_ratio=(2.0, 2.0), max_padded_size_ratio=(2.0, 2.0),
pad_color=None, pad_color=None,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Randomly crops and pads the image. """Randomly crops and pads the image.
Given an input image and its bounding boxes, this op first randomly crops Given an input image and its bounding boxes, this op first randomly crops
...@@ -1241,6 +1531,10 @@ def random_crop_pad_image(image, ...@@ -1241,6 +1531,10 @@ def random_crop_pad_image(image,
if set as None, it will be set to average color of the randomly if set as None, it will be set to average color of the randomly
cropped image. cropped image.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
padded_image: padded image. padded_image: padded image.
...@@ -1263,7 +1557,8 @@ def random_crop_pad_image(image, ...@@ -1263,7 +1557,8 @@ def random_crop_pad_image(image,
area_range=area_range, area_range=area_range,
overlap_thresh=overlap_thresh, overlap_thresh=overlap_thresh,
random_coef=random_coef, random_coef=random_coef,
seed=seed) seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
cropped_image, cropped_boxes, cropped_labels = result[:3] cropped_image, cropped_boxes, cropped_labels = result[:3]
...@@ -1280,7 +1575,8 @@ def random_crop_pad_image(image, ...@@ -1280,7 +1575,8 @@ def random_crop_pad_image(image,
min_image_size=min_image_size, min_image_size=min_image_size,
max_image_size=max_image_size, max_image_size=max_image_size,
pad_color=pad_color, pad_color=pad_color,
seed=seed) seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
cropped_padded_output = (padded_image, padded_boxes, cropped_labels) cropped_padded_output = (padded_image, padded_boxes, cropped_labels)
...@@ -1299,7 +1595,8 @@ def random_crop_to_aspect_ratio(image, ...@@ -1299,7 +1595,8 @@ def random_crop_to_aspect_ratio(image,
keypoints=None, keypoints=None,
aspect_ratio=1.0, aspect_ratio=1.0,
overlap_thresh=0.3, overlap_thresh=0.3,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Randomly crops an image to the specified aspect ratio. """Randomly crops an image to the specified aspect ratio.
Randomly crops the a portion of the image such that the crop is of the Randomly crops the a portion of the image such that the crop is of the
...@@ -1331,6 +1628,10 @@ def random_crop_to_aspect_ratio(image, ...@@ -1331,6 +1628,10 @@ def random_crop_to_aspect_ratio(image,
overlap_thresh: minimum overlap thresh with new cropped overlap_thresh: minimum overlap thresh with new cropped
image to keep the box. image to keep the box.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same rank as input image. image: image which is the same rank as input image.
...@@ -1374,6 +1675,13 @@ def random_crop_to_aspect_ratio(image, ...@@ -1374,6 +1675,13 @@ def random_crop_to_aspect_ratio(image,
# offset_height is randomly chosen from [0, offset_height - target_height) # offset_height is randomly chosen from [0, offset_height - target_height)
offset_height = _random_integer(0, orig_height - target_height + 1, seed) offset_height = _random_integer(0, orig_height - target_height + 1, seed)
offset_width = _random_integer(0, orig_width - target_width + 1, seed) offset_width = _random_integer(0, orig_width - target_width + 1, seed)
generator_func = lambda: (offset_height, offset_width)
offset_height, offset_width = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.CROP_TO_ASPECT_RATIO,
preprocess_vars_cache)
new_image = tf.image.crop_to_bounding_box( new_image = tf.image.crop_to_bounding_box(
image, offset_height, offset_width, target_height, target_width) image, offset_height, offset_width, target_height, target_width)
...@@ -1436,7 +1744,8 @@ def random_pad_to_aspect_ratio(image, ...@@ -1436,7 +1744,8 @@ def random_pad_to_aspect_ratio(image,
aspect_ratio=1.0, aspect_ratio=1.0,
min_padded_size_ratio=(1.0, 1.0), min_padded_size_ratio=(1.0, 1.0),
max_padded_size_ratio=(2.0, 2.0), max_padded_size_ratio=(2.0, 2.0),
seed=None): seed=None,
preprocess_vars_cache=None):
"""Randomly zero pads an image to the specified aspect ratio. """Randomly zero pads an image to the specified aspect ratio.
Pads the image so that the resulting image will have the specified aspect Pads the image so that the resulting image will have the specified aspect
...@@ -1464,6 +1773,10 @@ def random_pad_to_aspect_ratio(image, ...@@ -1464,6 +1773,10 @@ def random_pad_to_aspect_ratio(image,
max_padded_size_ratio: max ratio of padded image height and width to the max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width. input image's height and width.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same rank as input image. image: image which is the same rank as input image.
...@@ -1510,7 +1823,13 @@ def random_pad_to_aspect_ratio(image, ...@@ -1510,7 +1823,13 @@ def random_pad_to_aspect_ratio(image,
min_scale = tf.maximum(min_height / target_height, min_width / target_width) min_scale = tf.maximum(min_height / target_height, min_width / target_width)
max_scale = tf.minimum(max_height / target_height, max_width / target_width) max_scale = tf.minimum(max_height / target_height, max_width / target_width)
scale = tf.random_uniform([], min_scale, max_scale, seed=seed)
generator_func = functools.partial(tf.random_uniform, [],
min_scale, max_scale, seed=seed)
scale = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.PAD_TO_ASPECT_RATIO,
preprocess_vars_cache)
target_height = scale * target_height target_height = scale * target_height
target_width = scale * target_width target_width = scale * target_width
...@@ -1549,7 +1868,8 @@ def random_black_patches(image, ...@@ -1549,7 +1868,8 @@ def random_black_patches(image,
max_black_patches=10, max_black_patches=10,
probability=0.5, probability=0.5,
size_to_image_ratio=0.1, size_to_image_ratio=0.1,
random_seed=None): random_seed=None,
preprocess_vars_cache=None):
"""Randomly adds some black patches to the image. """Randomly adds some black patches to the image.
This op adds up to max_black_patches square black patches of a fixed size This op adds up to max_black_patches square black patches of a fixed size
...@@ -1566,15 +1886,20 @@ def random_black_patches(image, ...@@ -1566,15 +1886,20 @@ def random_black_patches(image,
box_size = size_to_image_ratio * box_size = size_to_image_ratio *
min(image_width, image_height) min(image_width, image_height)
random_seed: random seed. random_seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image image
""" """
def add_black_patch_to_image(image): def add_black_patch_to_image(image, idx):
"""Function for adding one patch to the image. """Function for adding one patch to the image.
Args: Args:
image: image image: image
idx: counter for number of patches that could have been added
Returns: Returns:
image with a randomly added black box image with a randomly added black box
...@@ -1586,10 +1911,19 @@ def random_black_patches(image, ...@@ -1586,10 +1911,19 @@ def random_black_patches(image,
tf.multiply( tf.multiply(
tf.minimum(tf.to_float(image_height), tf.to_float(image_width)), tf.minimum(tf.to_float(image_height), tf.to_float(image_width)),
size_to_image_ratio)) size_to_image_ratio))
normalized_y_min = tf.random_uniform(
[], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed) generator_func = functools.partial(tf.random_uniform, [], minval=0.0,
normalized_x_min = tf.random_uniform( maxval=(1.0 - size_to_image_ratio),
[], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed) seed=random_seed)
normalized_y_min = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH,
preprocess_vars_cache, key=str(idx) + 'y')
normalized_x_min = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH,
preprocess_vars_cache, key=str(idx) + 'x')
y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height)) y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height))
x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width)) x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width))
black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32) black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32)
...@@ -1599,13 +1933,17 @@ def random_black_patches(image, ...@@ -1599,13 +1933,17 @@ def random_black_patches(image,
return image return image
with tf.name_scope('RandomBlackPatchInImage', values=[image]): with tf.name_scope('RandomBlackPatchInImage', values=[image]):
for _ in range(max_black_patches): for idx in range(max_black_patches):
random_prob = tf.random_uniform( generator_func = functools.partial(tf.random_uniform, [],
[], minval=0.0, maxval=1.0, dtype=tf.float32, seed=random_seed) minval=0.0, maxval=1.0,
dtype=tf.float32, seed=random_seed)
random_prob = _get_or_create_preprocess_rand_vars(
generator_func,
preprocessor_cache.PreprocessorCache.BLACK_PATCHES,
preprocess_vars_cache, key=idx)
image = tf.cond( image = tf.cond(
tf.greater(random_prob, probability), lambda: image, tf.greater(random_prob, probability), lambda: image,
lambda: add_black_patch_to_image(image)) functools.partial(add_black_patch_to_image, image=image, idx=idx))
return image return image
...@@ -1623,12 +1961,16 @@ def image_to_float(image): ...@@ -1623,12 +1961,16 @@ def image_to_float(image):
return image return image
def random_resize_method(image, target_size): def random_resize_method(image, target_size, preprocess_vars_cache=None):
"""Uses a random resize method to resize the image to target size. """Uses a random resize method to resize the image to target size.
Args: Args:
image: a rank 3 tensor. image: a rank 3 tensor.
target_size: a list of [target_height, target_width] target_size: a list of [target_height, target_width]
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
resized image. resized image.
...@@ -1637,7 +1979,9 @@ def random_resize_method(image, target_size): ...@@ -1637,7 +1979,9 @@ def random_resize_method(image, target_size):
resized_image = _apply_with_random_selector( resized_image = _apply_with_random_selector(
image, image,
lambda x, method: tf.image.resize_images(x, target_size, method), lambda x, method: tf.image.resize_images(x, target_size, method),
num_cases=4) num_cases=4,
preprocess_vars_cache=preprocess_vars_cache,
key=preprocessor_cache.PreprocessorCache.RESIZE_METHOD)
return resized_image return resized_image
...@@ -1647,6 +1991,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension): ...@@ -1647,6 +1991,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
image_shape = image.get_shape().as_list() image_shape = image.get_shape().as_list()
orig_height = image_shape[0] orig_height = image_shape[0]
orig_width = image_shape[1] orig_width = image_shape[1]
num_channels = image_shape[2]
orig_min_dim = min(orig_height, orig_width) orig_min_dim = min(orig_height, orig_width)
# Calculates the larger of the possible sizes # Calculates the larger of the possible sizes
large_scale_factor = min_dimension / float(orig_min_dim) large_scale_factor = min_dimension / float(orig_min_dim)
...@@ -1674,7 +2019,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension): ...@@ -1674,7 +2019,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
new_size = small_size new_size = small_size
else: else:
new_size = large_size new_size = large_size
return tf.constant(new_size) return tf.constant(new_size + [num_channels])
def _compute_new_dynamic_size(image, min_dimension, max_dimension): def _compute_new_dynamic_size(image, min_dimension, max_dimension):
...@@ -1682,6 +2027,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension): ...@@ -1682,6 +2027,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
image_shape = tf.shape(image) image_shape = tf.shape(image)
orig_height = tf.to_float(image_shape[0]) orig_height = tf.to_float(image_shape[0])
orig_width = tf.to_float(image_shape[1]) orig_width = tf.to_float(image_shape[1])
num_channels = image_shape[2]
orig_min_dim = tf.minimum(orig_height, orig_width) orig_min_dim = tf.minimum(orig_height, orig_width)
# Calculates the larger of the possible sizes # Calculates the larger of the possible sizes
min_dimension = tf.constant(min_dimension, dtype=tf.float32) min_dimension = tf.constant(min_dimension, dtype=tf.float32)
...@@ -1711,7 +2057,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension): ...@@ -1711,7 +2057,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
lambda: small_size, lambda: large_size) lambda: small_size, lambda: large_size)
else: else:
new_size = large_size new_size = large_size
return new_size return tf.stack(tf.unstack(new_size) + [num_channels])
def resize_to_range(image, def resize_to_range(image,
...@@ -1719,7 +2065,8 @@ def resize_to_range(image, ...@@ -1719,7 +2065,8 @@ def resize_to_range(image,
min_dimension=None, min_dimension=None,
max_dimension=None, max_dimension=None,
method=tf.image.ResizeMethod.BILINEAR, method=tf.image.ResizeMethod.BILINEAR,
align_corners=False): align_corners=False,
pad_to_max_dimension=False):
"""Resizes an image so its dimensions are within the provided value. """Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases: The output size can be described by two cases:
...@@ -1740,15 +2087,22 @@ def resize_to_range(image, ...@@ -1740,15 +2087,22 @@ def resize_to_range(image,
BILINEAR. BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False. and output. Defaults to False.
pad_to_max_dimension: Whether to resize the image and pad it with zeros
so the resulting image is of the spatial size
[max_dimension, max_dimension]. If masks are included they are padded
similarly.
Returns: Returns:
A 3D tensor of shape [new_height, new_width, channels], Note that the position of the resized_image_shape changes based on whether
where the image has been resized (with bilinear interpolation) so that masks are present.
min(new_height, new_width) == min_dimension or resized_image: A 3D tensor of shape [new_height, new_width, channels],
max(new_height, new_width) == max_dimension. where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
If masks is not None, also outputs masks: max(new_height, new_width) == max_dimension.
A 3D tensor of shape [num_instances, new_height, new_width] resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width].
resized_image_shape: A 1D tensor of shape [3] containing shape of the
resized image.
Raises: Raises:
ValueError: if the image is not a 3D tensor. ValueError: if the image is not a 3D tensor.
...@@ -1762,16 +2116,27 @@ def resize_to_range(image, ...@@ -1762,16 +2116,27 @@ def resize_to_range(image,
else: else:
new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension) new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
new_image = tf.image.resize_images( new_image = tf.image.resize_images(
image, new_size, method=method, align_corners=align_corners) image, new_size[:-1], method=method, align_corners=align_corners)
result = new_image if pad_to_max_dimension:
new_image = tf.image.pad_to_bounding_box(
new_image, 0, 0, max_dimension, max_dimension)
result = [new_image]
if masks is not None: if masks is not None:
new_masks = tf.expand_dims(masks, 3) new_masks = tf.expand_dims(masks, 3)
new_masks = tf.image.resize_nearest_neighbor( new_masks = tf.image.resize_images(
new_masks, new_size, align_corners=align_corners) new_masks,
new_size[:-1],
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=align_corners)
new_masks = tf.squeeze(new_masks, 3) new_masks = tf.squeeze(new_masks, 3)
result = [new_image, new_masks] if pad_to_max_dimension:
new_masks = tf.image.pad_to_bounding_box(
new_masks, 0, 0, max_dimension, max_dimension)
result.append(new_masks)
result.append(new_size)
return result return result
...@@ -1789,10 +2154,13 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600): ...@@ -1789,10 +2154,13 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
min_dimension: minimum image dimension. min_dimension: minimum image dimension.
Returns: Returns:
a tuple containing the following: Note that the position of the resized_image_shape changes based on whether
Resized image. A tensor of size [new_height, new_width, channels]. masks are present.
(optional) Resized masks. A tensor of resized_image: A tensor of size [new_height, new_width, channels].
size [num_instances, new_height, new_width]. resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width]
resized_image_shape: A 1D tensor of shape [3] containing the shape of the
resized image.
Raises: Raises:
ValueError: if the image is not a 3D tensor. ValueError: if the image is not a 3D tensor.
...@@ -1803,6 +2171,7 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600): ...@@ -1803,6 +2171,7 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]): with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]):
image_height = tf.shape(image)[0] image_height = tf.shape(image)[0]
image_width = tf.shape(image)[1] image_width = tf.shape(image)[1]
num_channels = tf.shape(image)[2]
min_image_dimension = tf.minimum(image_height, image_width) min_image_dimension = tf.minimum(image_height, image_width)
min_target_dimension = tf.maximum(min_image_dimension, min_dimension) min_target_dimension = tf.maximum(min_image_dimension, min_dimension)
target_ratio = tf.to_float(min_target_dimension) / tf.to_float( target_ratio = tf.to_float(min_target_dimension) / tf.to_float(
...@@ -1813,13 +2182,16 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600): ...@@ -1813,13 +2182,16 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
tf.expand_dims(image, axis=0), tf.expand_dims(image, axis=0),
size=[target_height, target_width], size=[target_height, target_width],
align_corners=True) align_corners=True)
result = tf.squeeze(image, axis=0) result = [tf.squeeze(image, axis=0)]
if masks is not None: if masks is not None:
masks = tf.image.resize_nearest_neighbor( masks = tf.image.resize_nearest_neighbor(
tf.expand_dims(masks, axis=3), tf.expand_dims(masks, axis=3),
size=[target_height, target_width], size=[target_height, target_width],
align_corners=True) align_corners=True)
result = (result, tf.squeeze(masks, axis=3)) result.append(tf.squeeze(masks, axis=3))
result.append(tf.stack([target_height, target_width, num_channels]))
return result return result
...@@ -1854,6 +2226,8 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None): ...@@ -1854,6 +2226,8 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
return tuple(result) return tuple(result)
# TODO: Investigate if instead the function should return None if
# masks is None.
# pylint: disable=g-doc-return-or-yield # pylint: disable=g-doc-return-or-yield
def resize_image(image, def resize_image(image,
masks=None, masks=None,
...@@ -1861,7 +2235,28 @@ def resize_image(image, ...@@ -1861,7 +2235,28 @@ def resize_image(image,
new_width=1024, new_width=1024,
method=tf.image.ResizeMethod.BILINEAR, method=tf.image.ResizeMethod.BILINEAR,
align_corners=False): align_corners=False):
"""See `tf.image.resize_images` for detailed doc.""" """Resizes images to the given height and width.
Args:
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks.
new_height: (optional) (scalar) desired height of the image.
new_width: (optional) (scalar) desired width of the image.
method: (optional) interpolation method used in resizing. Defaults to
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
Returns:
Note that the position of the resized_image_shape changes based on whether
masks are present.
resized_image: A tensor of size [new_height, new_width, channels].
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width]
resized_image_shape: A 1D tensor of shape [3] containing the shape of the
resized image.
"""
with tf.name_scope( with tf.name_scope(
'ResizeImage', 'ResizeImage',
values=[image, new_height, new_width, method, align_corners]): values=[image, new_height, new_width, method, align_corners]):
...@@ -1869,7 +2264,8 @@ def resize_image(image, ...@@ -1869,7 +2264,8 @@ def resize_image(image,
image, [new_height, new_width], image, [new_height, new_width],
method=method, method=method,
align_corners=align_corners) align_corners=align_corners)
result = new_image image_shape = shape_utils.combined_static_and_dynamic_shape(image)
result = [new_image]
if masks is not None: if masks is not None:
num_instances = tf.shape(masks)[0] num_instances = tf.shape(masks)[0]
new_size = tf.constant([new_height, new_width], dtype=tf.int32) new_size = tf.constant([new_height, new_width], dtype=tf.int32)
...@@ -1886,8 +2282,9 @@ def resize_image(image, ...@@ -1886,8 +2282,9 @@ def resize_image(image,
masks = tf.cond(num_instances > 0, resize_masks_branch, masks = tf.cond(num_instances > 0, resize_masks_branch,
reshape_masks_branch) reshape_masks_branch)
result = [new_image, masks] result.append(masks)
result.append(tf.stack([new_height, new_width, image_shape[2]]))
return result return result
...@@ -1946,7 +2343,7 @@ def rgb_to_gray(image): ...@@ -1946,7 +2343,7 @@ def rgb_to_gray(image):
Returns: Returns:
image: A single channel grayscale image -> [image, height, 1]. image: A single channel grayscale image -> [image, height, 1].
""" """
return tf.image.rgb_to_grayscale(image) return _rgb_to_grayscale(image)
def ssd_random_crop(image, def ssd_random_crop(image,
...@@ -1960,7 +2357,8 @@ def ssd_random_crop(image, ...@@ -1960,7 +2357,8 @@ def ssd_random_crop(image,
area_range=((0.1, 1.0),) * 7, area_range=((0.1, 1.0),) * 7,
overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
random_coef=(0.15,) * 7, random_coef=(0.15,) * 7,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Random crop preprocessing with default parameters as in SSD paper. """Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector. Liu et al., SSD: Single shot multibox detector.
...@@ -1994,6 +2392,10 @@ def ssd_random_crop(image, ...@@ -1994,6 +2392,10 @@ def ssd_random_crop(image,
cropped image, and if it is 1.0, we will always get the cropped image, and if it is 1.0, we will always get the
original image. original image.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same rank as input image. image: image which is the same rank as input image.
...@@ -2046,14 +2448,17 @@ def ssd_random_crop(image, ...@@ -2046,14 +2448,17 @@ def ssd_random_crop(image,
area_range=area_range[index], area_range=area_range[index],
overlap_thresh=overlap_thresh[index], overlap_thresh=overlap_thresh[index],
random_coef=random_coef[index], random_coef=random_coef[index],
seed=seed) seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
result = _apply_with_random_selector_tuples( result = _apply_with_random_selector_tuples(
tuple( tuple(
t for t in (image, boxes, labels, label_scores, masks, keypoints) t for t in (image, boxes, labels, label_scores, masks, keypoints)
if t is not None), if t is not None),
random_crop_selector, random_crop_selector,
num_cases=len(min_object_covered)) num_cases=len(min_object_covered),
preprocess_vars_cache=preprocess_vars_cache,
key=preprocessor_cache.PreprocessorCache.SSD_CROP_SELECTOR_ID)
return result return result
...@@ -2069,7 +2474,8 @@ def ssd_random_crop_pad(image, ...@@ -2069,7 +2474,8 @@ def ssd_random_crop_pad(image,
min_padded_size_ratio=((1.0, 1.0),) * 6, min_padded_size_ratio=((1.0, 1.0),) * 6,
max_padded_size_ratio=((2.0, 2.0),) * 6, max_padded_size_ratio=((2.0, 2.0),) * 6,
pad_color=(None,) * 6, pad_color=(None,) * 6,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Random crop preprocessing with default parameters as in SSD paper. """Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector. Liu et al., SSD: Single shot multibox detector.
...@@ -2105,6 +2511,10 @@ def ssd_random_crop_pad(image, ...@@ -2105,6 +2511,10 @@ def ssd_random_crop_pad(image,
if set as None, it will be set to average color of the randomly if set as None, it will be set to average color of the randomly
cropped image. cropped image.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: Image shape will be [new_height, new_width, channels]. image: Image shape will be [new_height, new_width, channels].
...@@ -2134,12 +2544,15 @@ def ssd_random_crop_pad(image, ...@@ -2134,12 +2544,15 @@ def ssd_random_crop_pad(image,
min_padded_size_ratio=min_padded_size_ratio[index], min_padded_size_ratio=min_padded_size_ratio[index],
max_padded_size_ratio=max_padded_size_ratio[index], max_padded_size_ratio=max_padded_size_ratio[index],
pad_color=pad_color[index], pad_color=pad_color[index],
seed=seed) seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
return _apply_with_random_selector_tuples( return _apply_with_random_selector_tuples(
tuple(t for t in (image, boxes, labels, label_scores) if t is not None), tuple(t for t in (image, boxes, labels, label_scores) if t is not None),
random_crop_pad_selector, random_crop_pad_selector,
num_cases=len(min_object_covered)) num_cases=len(min_object_covered),
preprocess_vars_cache=preprocess_vars_cache,
key=preprocessor_cache.PreprocessorCache.SSD_CROP_PAD_SELECTOR_ID)
def ssd_random_crop_fixed_aspect_ratio( def ssd_random_crop_fixed_aspect_ratio(
...@@ -2154,7 +2567,8 @@ def ssd_random_crop_fixed_aspect_ratio( ...@@ -2154,7 +2567,8 @@ def ssd_random_crop_fixed_aspect_ratio(
area_range=((0.1, 1.0),) * 7, area_range=((0.1, 1.0),) * 7,
overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0), overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
random_coef=(0.15,) * 7, random_coef=(0.15,) * 7,
seed=None): seed=None,
preprocess_vars_cache=None):
"""Random crop preprocessing with default parameters as in SSD paper. """Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector. Liu et al., SSD: Single shot multibox detector.
...@@ -2191,6 +2605,10 @@ def ssd_random_crop_fixed_aspect_ratio( ...@@ -2191,6 +2605,10 @@ def ssd_random_crop_fixed_aspect_ratio(
cropped image, and if it is 1.0, we will always get the cropped image, and if it is 1.0, we will always get the
original image. original image.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same rank as input image. image: image which is the same rank as input image.
...@@ -2209,7 +2627,8 @@ def ssd_random_crop_fixed_aspect_ratio( ...@@ -2209,7 +2627,8 @@ def ssd_random_crop_fixed_aspect_ratio(
crop_result = ssd_random_crop( crop_result = ssd_random_crop(
image, boxes, labels, label_scores, masks, keypoints, min_object_covered, image, boxes, labels, label_scores, masks, keypoints, min_object_covered,
aspect_ratio_range, area_range, overlap_thresh, random_coef, seed) aspect_ratio_range, area_range, overlap_thresh, random_coef, seed,
preprocess_vars_cache)
i = 3 i = 3
new_image, new_boxes, new_labels = crop_result[:i] new_image, new_boxes, new_labels = crop_result[:i]
new_label_scores = None new_label_scores = None
...@@ -2231,7 +2650,8 @@ def ssd_random_crop_fixed_aspect_ratio( ...@@ -2231,7 +2650,8 @@ def ssd_random_crop_fixed_aspect_ratio(
new_masks, new_masks,
new_keypoints, new_keypoints,
aspect_ratio=aspect_ratio, aspect_ratio=aspect_ratio,
seed=seed) seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
return result return result
...@@ -2251,7 +2671,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio( ...@@ -2251,7 +2671,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
random_coef=(0.15,) * 7, random_coef=(0.15,) * 7,
min_padded_size_ratio=(1.0, 1.0), min_padded_size_ratio=(1.0, 1.0),
max_padded_size_ratio=(2.0, 2.0), max_padded_size_ratio=(2.0, 2.0),
seed=None): seed=None,
preprocess_vars_cache=None):
"""Random crop and pad preprocessing with default parameters as in SSD paper. """Random crop and pad preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector. Liu et al., SSD: Single shot multibox detector.
...@@ -2294,6 +2715,10 @@ def ssd_random_crop_pad_fixed_aspect_ratio( ...@@ -2294,6 +2715,10 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
max_padded_size_ratio: max ratio of padded image height and width to the max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width. input image's height and width.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
image: image which is the same rank as input image. image: image which is the same rank as input image.
...@@ -2310,7 +2735,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio( ...@@ -2310,7 +2735,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
""" """
crop_result = ssd_random_crop( crop_result = ssd_random_crop(
image, boxes, labels, label_scores, masks, keypoints, min_object_covered, image, boxes, labels, label_scores, masks, keypoints, min_object_covered,
aspect_ratio_range, area_range, overlap_thresh, random_coef, seed) aspect_ratio_range, area_range, overlap_thresh, random_coef, seed,
preprocess_vars_cache)
i = 3 i = 3
new_image, new_boxes, new_labels = crop_result[:i] new_image, new_boxes, new_labels = crop_result[:i]
new_label_scores = None new_label_scores = None
...@@ -2332,7 +2758,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio( ...@@ -2332,7 +2758,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
aspect_ratio=aspect_ratio, aspect_ratio=aspect_ratio,
min_padded_size_ratio=min_padded_size_ratio, min_padded_size_ratio=min_padded_size_ratio,
max_padded_size_ratio=max_padded_size_ratio, max_padded_size_ratio=max_padded_size_ratio,
seed=seed) seed=seed,
preprocess_vars_cache=preprocess_vars_cache)
result = list(result) result = list(result)
if new_label_scores is not None: if new_label_scores is not None:
...@@ -2480,7 +2907,10 @@ def get_default_func_arg_map(include_label_scores=False, ...@@ -2480,7 +2907,10 @@ def get_default_func_arg_map(include_label_scores=False,
return prep_func_arg_map return prep_func_arg_map
def preprocess(tensor_dict, preprocess_options, func_arg_map=None): def preprocess(tensor_dict,
preprocess_options,
func_arg_map=None,
preprocess_vars_cache=None):
"""Preprocess images and bounding boxes. """Preprocess images and bounding boxes.
Various types of preprocessing (to be implemented) based on the Various types of preprocessing (to be implemented) based on the
...@@ -2505,6 +2935,10 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None): ...@@ -2505,6 +2935,10 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
their values. their values.
func_arg_map: mapping from preprocessing functions to arguments that they func_arg_map: mapping from preprocessing functions to arguments that they
expect to receive and return. expect to receive and return.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns: Returns:
tensor_dict: which contains the preprocessed images, bounding boxes, etc. tensor_dict: which contains the preprocessed images, bounding boxes, etc.
...@@ -2544,6 +2978,9 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None): ...@@ -2544,6 +2978,9 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
return tensor_dict[key] if key is not None else None return tensor_dict[key] if key is not None else None
args = [get_arg(a) for a in arg_names] args = [get_arg(a) for a in arg_names]
if (preprocess_vars_cache is not None and
'preprocess_vars_cache' in inspect.getargspec(func).args):
params['preprocess_vars_cache'] = preprocess_vars_cache
results = func(*args, **params) results = func(*args, **params)
if not isinstance(results, (list, tuple)): if not isinstance(results, (list, tuple)):
results = (results,) results = (results,)
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Records previous preprocessing operations and allows them to be repeated.
Used with object_detection.core.preprocessor. Passing a PreprocessorCache
into individual data augmentation functions or the general preprocess() function
will store all randomly generated variables in the PreprocessorCache. When
a preprocessor function is called multiple times with the same
PreprocessorCache object, that function will perform the same augmentation
on all calls.
"""
from collections import defaultdict
class PreprocessorCache(object):
"""Dictionary wrapper storing random variables generated during preprocessing.
"""
# Constant keys representing different preprocessing functions
ROTATION90 = 'rotation90'
HORIZONTAL_FLIP = 'horizontal_flip'
VERTICAL_FLIP = 'vertical_flip'
PIXEL_VALUE_SCALE = 'pixel_value_scale'
IMAGE_SCALE = 'image_scale'
RGB_TO_GRAY = 'rgb_to_gray'
ADJUST_BRIGHTNESS = 'adjust_brightness'
ADJUST_CONTRAST = 'adjust_contrast'
ADJUST_HUE = 'adjust_hue'
ADJUST_SATURATION = 'adjust_saturation'
DISTORT_COLOR = 'distort_color'
STRICT_CROP_IMAGE = 'strict_crop_image'
CROP_IMAGE = 'crop_image'
PAD_IMAGE = 'pad_image'
CROP_TO_ASPECT_RATIO = 'crop_to_aspect_ratio'
RESIZE_METHOD = 'resize_method'
PAD_TO_ASPECT_RATIO = 'pad_to_aspect_ratio'
BLACK_PATCHES = 'black_patches'
ADD_BLACK_PATCH = 'add_black_patch'
SELECTOR = 'selector'
SELECTOR_TUPLES = 'selector_tuples'
SSD_CROP_SELECTOR_ID = 'ssd_crop_selector_id'
SSD_CROP_PAD_SELECTOR_ID = 'ssd_crop_pad_selector_id'
# 23 permitted function ids
_VALID_FNS = [ROTATION90, HORIZONTAL_FLIP, VERTICAL_FLIP, PIXEL_VALUE_SCALE,
IMAGE_SCALE, RGB_TO_GRAY, ADJUST_BRIGHTNESS, ADJUST_CONTRAST,
ADJUST_HUE, ADJUST_SATURATION, DISTORT_COLOR, STRICT_CROP_IMAGE,
CROP_IMAGE, PAD_IMAGE, CROP_TO_ASPECT_RATIO, RESIZE_METHOD,
PAD_TO_ASPECT_RATIO, BLACK_PATCHES, ADD_BLACK_PATCH, SELECTOR,
SELECTOR_TUPLES, SSD_CROP_SELECTOR_ID, SSD_CROP_PAD_SELECTOR_ID]
def __init__(self):
self._history = defaultdict(dict)
def clear(self):
"""Resets cache."""
self._history = {}
def get(self, function_id, key):
"""Gets stored value given a function id and key.
Args:
function_id: identifier for the preprocessing function used.
key: identifier for the variable stored.
Returns:
value: the corresponding value, expected to be a tensor or
nested structure of tensors.
Raises:
ValueError: if function_id is not one of the 23 valid function ids.
"""
if function_id not in self._VALID_FNS:
raise ValueError('Function id not recognized: %s.' % str(function_id))
return self._history[function_id].get(key)
def update(self, function_id, key, value):
"""Adds a value to the dictionary.
Args:
function_id: identifier for the preprocessing function used.
key: identifier for the variable stored.
value: the value to store, expected to be a tensor or nested structure
of tensors.
Raises:
ValueError: if function_id is not one of the 23 valid function ids.
"""
if function_id not in self._VALID_FNS:
raise ValueError('Function id not recognized: %s.' % str(function_id))
self._history[function_id][key] = value
...@@ -21,6 +21,7 @@ import six ...@@ -21,6 +21,7 @@ import six
import tensorflow as tf import tensorflow as tf
from object_detection.core import preprocessor from object_detection.core import preprocessor
from object_detection.core import preprocessor_cache
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
if six.PY2: if six.PY2:
...@@ -290,6 +291,15 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -290,6 +291,15 @@ class PreprocessorTest(tf.test.TestCase):
def expectedLabelsAfterThresholdingWithMissingScore(self): def expectedLabelsAfterThresholdingWithMissingScore(self):
return tf.constant([2], dtype=tf.float32) return tf.constant([2], dtype=tf.float32)
def testRgbToGrayscale(self):
images = self.createTestImages()
grayscale_images = preprocessor._rgb_to_grayscale(images)
expected_images = tf.image.rgb_to_grayscale(images)
with self.test_session() as sess:
(grayscale_images, expected_images) = sess.run(
[grayscale_images, expected_images])
self.assertAllEqual(expected_images, grayscale_images)
def testNormalizeImage(self): def testNormalizeImage(self):
preprocess_options = [(preprocessor.normalize_image, { preprocess_options = [(preprocessor.normalize_image, {
'original_minval': 0, 'original_minval': 0,
...@@ -435,6 +445,55 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -435,6 +445,55 @@ class PreprocessorTest(tf.test.TestCase):
rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask]) rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask])
self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten()) self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten())
def _testPreprocessorCache(self,
preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False,
num_runs=4):
cache = preprocessor_cache.PreprocessorCache()
images = self.createTestImages()
boxes = self.createTestBoxes()
classes = self.createTestLabels()
masks = self.createTestMasks()
keypoints = self.createTestKeypoints()
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=test_masks, include_keypoints=test_keypoints)
out = []
for i in range(num_runs):
tensor_dict = {
fields.InputDataFields.image: images,
}
num_outputs = 1
if test_boxes:
tensor_dict[fields.InputDataFields.groundtruth_boxes] = boxes
tensor_dict[fields.InputDataFields.groundtruth_classes] = classes
num_outputs += 1
if test_masks:
tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
num_outputs += 1
if test_keypoints:
tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints
num_outputs += 1
out.append(preprocessor.preprocess(
tensor_dict, preprocess_options, preprocessor_arg_map, cache))
with self.test_session() as sess:
to_run = []
for i in range(num_runs):
to_run.append(out[i][fields.InputDataFields.image])
if test_boxes:
to_run.append(out[i][fields.InputDataFields.groundtruth_boxes])
if test_masks:
to_run.append(
out[i][fields.InputDataFields.groundtruth_instance_masks])
if test_keypoints:
to_run.append(out[i][fields.InputDataFields.groundtruth_keypoints])
out_array = sess.run(to_run)
for i in range(num_outputs, len(out_array)):
self.assertAllClose(out_array[i], out_array[i - num_outputs])
def testRandomHorizontalFlip(self): def testRandomHorizontalFlip(self):
preprocess_options = [(preprocessor.random_horizontal_flip, {})] preprocess_options = [(preprocessor.random_horizontal_flip, {})]
images = self.expectedImagesAfterNormalization() images = self.expectedImagesAfterNormalization()
...@@ -491,6 +550,16 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -491,6 +550,16 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(boxes_, boxes_expected_) self.assertAllClose(boxes_, boxes_expected_)
self.assertAllClose(images_diff_, images_diff_expected_) self.assertAllClose(images_diff_, images_diff_expected_)
def testRandomHorizontalFlipWithCache(self):
keypoint_flip_permutation = self.createKeypointFlipPermutation()
preprocess_options = [
(preprocessor.random_horizontal_flip,
{'keypoint_flip_permutation': keypoint_flip_permutation})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRunRandomHorizontalFlipWithMaskAndKeypoints(self): def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
preprocess_options = [(preprocessor.random_horizontal_flip, {})] preprocess_options = [(preprocessor.random_horizontal_flip, {})]
image_height = 3 image_height = 3
...@@ -578,6 +647,16 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -578,6 +647,16 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(boxes_, boxes_expected_) self.assertAllClose(boxes_, boxes_expected_)
self.assertAllClose(images_diff_, images_diff_expected_) self.assertAllClose(images_diff_, images_diff_expected_)
def testRandomVerticalFlipWithCache(self):
keypoint_flip_permutation = self.createKeypointFlipPermutation()
preprocess_options = [
(preprocessor.random_vertical_flip,
{'keypoint_flip_permutation': keypoint_flip_permutation})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRunRandomVerticalFlipWithMaskAndKeypoints(self): def testRunRandomVerticalFlipWithMaskAndKeypoints(self):
preprocess_options = [(preprocessor.random_vertical_flip, {})] preprocess_options = [(preprocessor.random_vertical_flip, {})]
image_height = 3 image_height = 3
...@@ -665,6 +744,13 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -665,6 +744,13 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(boxes_, boxes_expected_) self.assertAllClose(boxes_, boxes_expected_)
self.assertAllClose(images_diff_, images_diff_expected_) self.assertAllClose(images_diff_, images_diff_expected_)
def testRandomRotation90WithCache(self):
preprocess_options = [(preprocessor.random_rotation90, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRunRandomRotation90WithMaskAndKeypoints(self): def testRunRandomRotation90WithMaskAndKeypoints(self):
preprocess_options = [(preprocessor.random_rotation90, {})] preprocess_options = [(preprocessor.random_rotation90, {})]
image_height = 3 image_height = 3
...@@ -716,6 +802,20 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -716,6 +802,20 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(values_greater_, values_true_) self.assertAllClose(values_greater_, values_true_)
self.assertAllClose(values_less_, values_true_) self.assertAllClose(values_less_, values_true_)
def testRandomPixelValueScaleWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_pixel_value_scale, {}))
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=False,
test_keypoints=False)
def testRandomImageScale(self): def testRandomImageScale(self):
preprocess_options = [(preprocessor.random_image_scale, {})] preprocess_options = [(preprocessor.random_image_scale, {})]
images_original = self.createTestImages() images_original = self.createTestImages()
...@@ -736,6 +836,13 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -736,6 +836,13 @@ class PreprocessorTest(tf.test.TestCase):
self.assertTrue( self.assertTrue(
images_original_shape_[2] * 2.0 >= images_scaled_shape_[2]) images_original_shape_[2] * 2.0 >= images_scaled_shape_[2])
def testRandomImageScaleWithCache(self):
preprocess_options = [(preprocessor.random_image_scale, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomRGBtoGray(self): def testRandomRGBtoGray(self):
preprocess_options = [(preprocessor.random_rgb_to_gray, {})] preprocess_options = [(preprocessor.random_rgb_to_gray, {})]
images_original = self.createTestImages() images_original = self.createTestImages()
...@@ -769,6 +876,14 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -769,6 +876,14 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(images_g_diff_, image_zero1_) self.assertAllClose(images_g_diff_, image_zero1_)
self.assertAllClose(images_b_diff_, image_zero1_) self.assertAllClose(images_b_diff_, image_zero1_)
def testRandomRGBtoGrayWithCache(self):
preprocess_options = [(
preprocessor.random_rgb_to_gray, {'probability': 0.5})]
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomAdjustBrightness(self): def testRandomAdjustBrightness(self):
preprocessing_options = [] preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, { preprocessing_options.append((preprocessor.normalize_image, {
...@@ -789,6 +904,20 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -789,6 +904,20 @@ class PreprocessorTest(tf.test.TestCase):
[image_original_shape, image_bright_shape]) [image_original_shape, image_bright_shape])
self.assertAllEqual(image_original_shape_, image_bright_shape_) self.assertAllEqual(image_original_shape_, image_bright_shape_)
def testRandomAdjustBrightnessWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_adjust_brightness, {}))
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomAdjustContrast(self): def testRandomAdjustContrast(self):
preprocessing_options = [] preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, { preprocessing_options.append((preprocessor.normalize_image, {
...@@ -809,6 +938,20 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -809,6 +938,20 @@ class PreprocessorTest(tf.test.TestCase):
[image_original_shape, image_contrast_shape]) [image_original_shape, image_contrast_shape])
self.assertAllEqual(image_original_shape_, image_contrast_shape_) self.assertAllEqual(image_original_shape_, image_contrast_shape_)
def testRandomAdjustContrastWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_adjust_contrast, {}))
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomAdjustHue(self): def testRandomAdjustHue(self):
preprocessing_options = [] preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, { preprocessing_options.append((preprocessor.normalize_image, {
...@@ -829,6 +972,20 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -829,6 +972,20 @@ class PreprocessorTest(tf.test.TestCase):
[image_original_shape, image_hue_shape]) [image_original_shape, image_hue_shape])
self.assertAllEqual(image_original_shape_, image_hue_shape_) self.assertAllEqual(image_original_shape_, image_hue_shape_)
def testRandomAdjustHueWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_adjust_hue, {}))
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomDistortColor(self): def testRandomDistortColor(self):
preprocessing_options = [] preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, { preprocessing_options.append((preprocessor.normalize_image, {
...@@ -849,6 +1006,20 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -849,6 +1006,20 @@ class PreprocessorTest(tf.test.TestCase):
[images_original_shape, images_distorted_color_shape]) [images_original_shape, images_distorted_color_shape])
self.assertAllEqual(images_original_shape_, images_distorted_color_shape_) self.assertAllEqual(images_original_shape_, images_distorted_color_shape_)
def testRandomDistortColorWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_distort_color, {}))
self._testPreprocessorCache(preprocess_options,
test_boxes=False,
test_masks=False,
test_keypoints=False)
def testRandomJitterBoxes(self): def testRandomJitterBoxes(self):
preprocessing_options = [] preprocessing_options = []
preprocessing_options.append((preprocessor.random_jitter_boxes, {})) preprocessing_options.append((preprocessor.random_jitter_boxes, {}))
...@@ -900,6 +1071,21 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -900,6 +1071,21 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_) self.assertAllEqual(images_rank_, distorted_images_rank_)
def testRandomCropImageWithCache(self):
preprocess_options = [(preprocessor.random_rgb_to_gray,
{'probability': 0.5}),
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1,
}),
(preprocessor.random_crop_image, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=False,
test_keypoints=False)
def testRandomCropImageGrayscale(self): def testRandomCropImageGrayscale(self):
preprocessing_options = [(preprocessor.rgb_to_gray, {}), preprocessing_options = [(preprocessor.rgb_to_gray, {}),
(preprocessor.normalize_image, { (preprocessor.normalize_image, {
...@@ -1446,6 +1632,13 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1446,6 +1632,13 @@ class PreprocessorTest(tf.test.TestCase):
self.expectedKeypointsAfterThresholding()]) self.expectedKeypointsAfterThresholding()])
self.assertAllClose(retained_keypoints_, expected_keypoints_) self.assertAllClose(retained_keypoints_, expected_keypoints_)
def testRandomCropToAspectRatioWithCache(self):
preprocess_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=False,
test_keypoints=False)
def testRunRandomCropToAspectRatioWithMasks(self): def testRunRandomCropToAspectRatioWithMasks(self):
image = self.createColorfulTestImage() image = self.createColorfulTestImage()
boxes = self.createTestBoxes() boxes = self.createTestBoxes()
...@@ -1536,6 +1729,13 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1536,6 +1729,13 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(distorted_keypoints_.flatten(), self.assertAllClose(distorted_keypoints_.flatten(),
expected_keypoints.flatten()) expected_keypoints.flatten())
def testRandomPadToAspectRatioWithCache(self):
preprocess_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRunRandomPadToAspectRatioWithMasks(self): def testRunRandomPadToAspectRatioWithMasks(self):
image = self.createColorfulTestImage() image = self.createColorfulTestImage()
boxes = self.createTestBoxes() boxes = self.createTestBoxes()
...@@ -1624,6 +1824,17 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1624,6 +1824,17 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(distorted_keypoints_.flatten(), self.assertAllClose(distorted_keypoints_.flatten(),
expected_keypoints.flatten()) expected_keypoints.flatten())
def testRandomPadImageWithCache(self):
preprocess_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1,}), (preprocessor.random_pad_image, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRandomPadImage(self): def testRandomPadImage(self):
preprocessing_options = [(preprocessor.normalize_image, { preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0, 'original_minval': 0,
...@@ -1670,6 +1881,17 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1670,6 +1881,17 @@ class PreprocessorTest(tf.test.TestCase):
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= ( self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
padded_boxes_[:, 3] - padded_boxes_[:, 1]))) padded_boxes_[:, 3] - padded_boxes_[:, 1])))
def testRandomCropPadImageWithCache(self):
preprocess_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1,}), (preprocessor.random_crop_pad_image, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRandomCropPadImageWithRandomCoefOne(self): def testRandomCropPadImageWithRandomCoefOne(self):
preprocessing_options = [(preprocessor.normalize_image, { preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0, 'original_minval': 0,
...@@ -1788,6 +2010,22 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1788,6 +2010,22 @@ class PreprocessorTest(tf.test.TestCase):
self.assertEqual(images_shape_[1], padded_images_shape_[1]) self.assertEqual(images_shape_[1], padded_images_shape_[1])
self.assertEqual(2 * images_shape_[2], padded_images_shape_[2]) self.assertEqual(2 * images_shape_[2], padded_images_shape_[2])
def testRandomBlackPatchesWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_black_patches, {
'size_to_image_ratio': 0.5
}))
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRandomBlackPatches(self): def testRandomBlackPatches(self):
preprocessing_options = [] preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, { preprocessing_options.append((preprocessor.normalize_image, {
...@@ -1812,6 +2050,22 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1812,6 +2050,22 @@ class PreprocessorTest(tf.test.TestCase):
[images_shape, blacked_images_shape]) [images_shape, blacked_images_shape])
self.assertAllEqual(images_shape_, blacked_images_shape_) self.assertAllEqual(images_shape_, blacked_images_shape_)
def testRandomResizeMethodWithCache(self):
preprocess_options = []
preprocess_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocess_options.append((preprocessor.random_resize_method, {
'target_size': (75, 150)
}))
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=True,
test_keypoints=True)
def testRandomResizeMethod(self): def testRandomResizeMethod(self):
preprocessing_options = [] preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, { preprocessing_options.append((preprocessor.normalize_image, {
...@@ -1853,7 +2107,7 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1853,7 +2107,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list): expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape) in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape) in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_image( out_image, out_masks, _ = preprocessor.resize_image(
in_image, in_masks, new_height=height, new_width=width) in_image, in_masks, new_height=height, new_width=width)
out_image_shape = tf.shape(out_image) out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks) out_masks_shape = tf.shape(out_masks)
...@@ -1880,7 +2134,7 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1880,7 +2134,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list): expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape) in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape) in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_image( out_image, out_masks, _ = preprocessor.resize_image(
in_image, in_masks, new_height=height, new_width=width) in_image, in_masks, new_height=height, new_width=width)
out_image_shape = tf.shape(out_image) out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks) out_masks_shape = tf.shape(out_masks)
...@@ -1900,7 +2154,7 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1900,7 +2154,7 @@ class PreprocessorTest(tf.test.TestCase):
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.random_uniform(in_shape) in_image = tf.random_uniform(in_shape)
out_image = preprocessor.resize_to_range( out_image, _ = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim) in_image, min_dimension=min_dim, max_dimension=max_dim)
self.assertAllEqual(out_image.get_shape().as_list(), expected_shape) self.assertAllEqual(out_image.get_shape().as_list(), expected_shape)
...@@ -1913,7 +2167,7 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1913,7 +2167,7 @@ class PreprocessorTest(tf.test.TestCase):
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
out_image = preprocessor.resize_to_range( out_image, _ = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim) in_image, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image) out_image_shape = tf.shape(out_image)
with self.test_session() as sess: with self.test_session() as sess:
...@@ -1938,7 +2192,7 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1938,7 +2192,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list): expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape) in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape) in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range( out_image, out_masks, _ = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape) self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape)
self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape) self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape)
...@@ -1960,7 +2214,7 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1960,7 +2214,7 @@ class PreprocessorTest(tf.test.TestCase):
in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
in_masks = tf.placeholder(tf.float32, shape=(None, None, None)) in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
in_masks = tf.random_uniform(in_masks_shape) in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range( out_image, out_masks, _ = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image) out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks) out_masks_shape = tf.shape(out_masks)
...@@ -1991,7 +2245,7 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -1991,7 +2245,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list): expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape) in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape) in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range( out_image, out_masks, _ = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim) in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image) out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks) out_masks_shape = tf.shape(out_masks)
...@@ -2016,7 +2270,7 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -2016,7 +2270,7 @@ class PreprocessorTest(tf.test.TestCase):
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list): for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.random_uniform(in_shape) in_image = tf.random_uniform(in_shape)
out_image = preprocessor.resize_to_range( out_image, _ = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim) in_image, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image) out_image_shape = tf.shape(out_image)
...@@ -2039,7 +2293,7 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -2039,7 +2293,7 @@ class PreprocessorTest(tf.test.TestCase):
in_image = tf.placeholder(tf.float32, shape=(None, None, 3)) in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
in_masks = tf.placeholder(tf.float32, shape=(None, None, None)) in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
in_masks = tf.random_uniform(in_masks_shape) in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_min_dimension( out_image, out_masks, _ = preprocessor.resize_to_min_dimension(
in_image, in_masks, min_dimension=min_dim) in_image, in_masks, min_dimension=min_dim)
out_image_shape = tf.shape(out_image) out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks) out_masks_shape = tf.shape(out_masks)
...@@ -2069,7 +2323,7 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -2069,7 +2323,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list): expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape) in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape) in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_min_dimension( out_image, out_masks, _ = preprocessor.resize_to_min_dimension(
in_image, in_masks, min_dimension=min_dim) in_image, in_masks, min_dimension=min_dim)
out_image_shape = tf.shape(out_image) out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks) out_masks_shape = tf.shape(out_masks)
...@@ -2144,6 +2398,20 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -2144,6 +2398,20 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllEqual([0, 1, 1, 0, 1], one_hot) self.assertAllEqual([0, 1, 1, 0, 1], one_hot)
def testSSDRandomCropWithCache(self):
preprocess_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=False,
test_keypoints=False)
def testSSDRandomCrop(self): def testSSDRandomCrop(self):
preprocessing_options = [ preprocessing_options = [
(preprocessor.normalize_image, { (preprocessor.normalize_image, {
...@@ -2216,6 +2484,20 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -2216,6 +2484,20 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_) self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_) self.assertAllEqual(images_rank_, distorted_images_rank_)
def testSSDRandomCropFixedAspectRatioWithCache(self):
preprocess_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
self._testPreprocessorCache(preprocess_options,
test_boxes=True,
test_masks=False,
test_keypoints=False)
def _testSSDRandomCropFixedAspectRatio(self, def _testSSDRandomCropFixedAspectRatio(self,
include_label_scores, include_label_scores,
include_instance_masks, include_instance_masks,
......
...@@ -57,6 +57,10 @@ class InputDataFields(object): ...@@ -57,6 +57,10 @@ class InputDataFields(object):
groundtruth_keypoints: ground truth keypoints. groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities. groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_label_scores: groundtruth label scores. groundtruth_label_scores: groundtruth label scores.
groundtruth_weights: groundtruth weight factor for bounding boxes.
num_groundtruth_boxes: number of groundtruth boxes.
true_image_shapes: true shapes of images in the resized images, as resized
images can be padded with zeros.
""" """
image = 'image' image = 'image'
original_image = 'original_image' original_image = 'original_image'
...@@ -79,10 +83,13 @@ class InputDataFields(object): ...@@ -79,10 +83,13 @@ class InputDataFields(object):
groundtruth_keypoints = 'groundtruth_keypoints' groundtruth_keypoints = 'groundtruth_keypoints'
groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities' groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
groundtruth_label_scores = 'groundtruth_label_scores' groundtruth_label_scores = 'groundtruth_label_scores'
groundtruth_weights = 'groundtruth_weights'
num_groundtruth_boxes = 'num_groundtruth_boxes'
true_image_shape = 'true_image_shape'
class DetectionResultFields(object): class DetectionResultFields(object):
"""Naming converntions for storing the output of the detector. """Naming conventions for storing the output of the detector.
Attributes: Attributes:
source_id: source of the original image. source_id: source of the original image.
...@@ -162,6 +169,7 @@ class TfExampleFields(object): ...@@ -162,6 +169,7 @@ class TfExampleFields(object):
object_is_crowd: [DEPRECATED, use object_group_of instead] object_is_crowd: [DEPRECATED, use object_group_of instead]
is the object a single object or a crowd is the object a single object or a crowd
object_segment_area: the area of the segment. object_segment_area: the area of the segment.
object_weight: a weight factor for the object's bounding box.
instance_masks: instance segmentation masks. instance_masks: instance segmentation masks.
instance_boundaries: instance boundaries. instance_boundaries: instance boundaries.
instance_classes: Classes for each instance segmentation mask. instance_classes: Classes for each instance segmentation mask.
...@@ -194,6 +202,7 @@ class TfExampleFields(object): ...@@ -194,6 +202,7 @@ class TfExampleFields(object):
object_depiction = 'image/object/depiction' object_depiction = 'image/object/depiction'
object_is_crowd = 'image/object/is_crowd' object_is_crowd = 'image/object/is_crowd'
object_segment_area = 'image/object/segment/area' object_segment_area = 'image/object/segment/area'
object_weight = 'image/object/weight'
instance_masks = 'image/segmentation/object' instance_masks = 'image/segmentation/object'
instance_boundaries = 'image/boundaries/object' instance_boundaries = 'image/boundaries/object'
instance_classes = 'image/segmentation/object/class' instance_classes = 'image/segmentation/object/class'
......
...@@ -37,19 +37,19 @@ from object_detection.box_coders import faster_rcnn_box_coder ...@@ -37,19 +37,19 @@ from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.box_coders import mean_stddev_box_coder from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_coder as bcoder from object_detection.core import box_coder as bcoder
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import matcher as mat from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.core import standard_fields as fields
from object_detection.matchers import argmax_matcher from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher from object_detection.matchers import bipartite_matcher
from object_detection.utils import shape_utils
class TargetAssigner(object): class TargetAssigner(object):
"""Target assigner to compute classification and regression targets.""" """Target assigner to compute classification and regression targets."""
def __init__(self, similarity_calc, matcher, box_coder, def __init__(self, similarity_calc, matcher, box_coder,
positive_class_weight=1.0, negative_class_weight=1.0, negative_class_weight=1.0, unmatched_cls_target=None):
unmatched_cls_target=None):
"""Construct Object Detection Target Assigner. """Construct Object Detection Target Assigner.
Args: Args:
...@@ -58,10 +58,8 @@ class TargetAssigner(object): ...@@ -58,10 +58,8 @@ class TargetAssigner(object):
anchors. anchors.
box_coder: an object_detection.core.BoxCoder used to encode matching box_coder: an object_detection.core.BoxCoder used to encode matching
groundtruth boxes with respect to anchors. groundtruth boxes with respect to anchors.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0) anchors (default: 1.0). The weight must be in [0., 1.].
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be anchor (and can be empty for scalar targets). This shape must thus be
...@@ -82,7 +80,6 @@ class TargetAssigner(object): ...@@ -82,7 +80,6 @@ class TargetAssigner(object):
self._similarity_calc = similarity_calc self._similarity_calc = similarity_calc
self._matcher = matcher self._matcher = matcher
self._box_coder = box_coder self._box_coder = box_coder
self._positive_class_weight = positive_class_weight
self._negative_class_weight = negative_class_weight self._negative_class_weight = negative_class_weight
if unmatched_cls_target is None: if unmatched_cls_target is None:
self._unmatched_cls_target = tf.constant([0], tf.float32) self._unmatched_cls_target = tf.constant([0], tf.float32)
...@@ -94,7 +91,7 @@ class TargetAssigner(object): ...@@ -94,7 +91,7 @@ class TargetAssigner(object):
return self._box_coder return self._box_coder
def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None, def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
**params): groundtruth_weights=None, **params):
"""Assign classification and regression targets to each anchor. """Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors For a given set of anchors and groundtruth detections, match anchors
...@@ -113,6 +110,9 @@ class TargetAssigner(object): ...@@ -113,6 +110,9 @@ class TargetAssigner(object):
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all to None, groundtruth_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1). ground_truth boxes get a positive label (of 1).
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
**params: Additional keyword arguments for specific implementations of **params: Additional keyword arguments for specific implementations of
the Matcher. the Matcher.
...@@ -140,14 +140,21 @@ class TargetAssigner(object): ...@@ -140,14 +140,21 @@ class TargetAssigner(object):
groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(), groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
0)) 0))
groundtruth_labels = tf.expand_dims(groundtruth_labels, -1) groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
unmatched_shape_assert = tf.assert_equal( unmatched_shape_assert = shape_utils.assert_shape_equal(
tf.shape(groundtruth_labels)[1:], tf.shape(self._unmatched_cls_target), shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
message='Unmatched class target shape incompatible ' shape_utils.combined_static_and_dynamic_shape(
'with groundtruth labels shape!') self._unmatched_cls_target))
labels_and_box_shapes_assert = tf.assert_equal( labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
tf.shape(groundtruth_labels)[0], groundtruth_boxes.num_boxes(), shape_utils.combined_static_and_dynamic_shape(
message='Groundtruth boxes and labels have incompatible shapes!') groundtruth_labels)[:1],
shape_utils.combined_static_and_dynamic_shape(
groundtruth_boxes.get())[:1])
if groundtruth_weights is None:
num_gt_boxes = groundtruth_boxes.num_boxes_static()
if not num_gt_boxes:
num_gt_boxes = groundtruth_boxes.num_boxes()
groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
with tf.control_dependencies( with tf.control_dependencies(
[unmatched_shape_assert, labels_and_box_shapes_assert]): [unmatched_shape_assert, labels_and_box_shapes_assert]):
match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes, match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
...@@ -158,16 +165,16 @@ class TargetAssigner(object): ...@@ -158,16 +165,16 @@ class TargetAssigner(object):
match) match)
cls_targets = self._create_classification_targets(groundtruth_labels, cls_targets = self._create_classification_targets(groundtruth_labels,
match) match)
reg_weights = self._create_regression_weights(match) reg_weights = self._create_regression_weights(match, groundtruth_weights)
cls_weights = self._create_classification_weights( cls_weights = self._create_classification_weights(match,
match, self._positive_class_weight, self._negative_class_weight) groundtruth_weights)
num_anchors = anchors.num_boxes_static() num_anchors = anchors.num_boxes_static()
if num_anchors is not None: if num_anchors is not None:
reg_targets = self._reset_target_shape(reg_targets, num_anchors) reg_targets = self._reset_target_shape(reg_targets, num_anchors)
cls_targets = self._reset_target_shape(cls_targets, num_anchors) cls_targets = self._reset_target_shape(cls_targets, num_anchors)
reg_weights = self._reset_target_shape(reg_weights, num_anchors) reg_weights = self._reset_target_shape(reg_weights, num_anchors)
cls_weights = self._reset_target_shape(cls_weights, num_anchors) cls_weights = self._reset_target_shape(cls_weights, num_anchors)
return cls_targets, cls_weights, reg_targets, reg_weights, match return cls_targets, cls_weights, reg_targets, reg_weights, match
...@@ -198,23 +205,31 @@ class TargetAssigner(object): ...@@ -198,23 +205,31 @@ class TargetAssigner(object):
Returns: Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension] reg_targets: a float32 tensor with shape [N, box_code_dimension]
""" """
matched_anchor_indices = match.matched_column_indices() matched_gt_boxes = match.gather_based_on_match(
unmatched_ignored_anchor_indices = (match. groundtruth_boxes.get(),
unmatched_or_ignored_column_indices()) unmatched_value=tf.zeros(4),
matched_gt_indices = match.matched_row_indices() ignored_value=tf.zeros(4))
matched_anchors = box_list_ops.gather(anchors, matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
matched_anchor_indices) if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
matched_gt_boxes = box_list_ops.gather(groundtruth_boxes, groundtruth_keypoints = groundtruth_boxes.get_field(
matched_gt_indices) fields.BoxListFields.keypoints)
matched_reg_targets = self._box_coder.encode(matched_gt_boxes, matched_keypoints = match.gather_based_on_match(
matched_anchors) groundtruth_keypoints,
unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
matched_keypoints)
matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
match_results_shape = shape_utils.combined_static_and_dynamic_shape(
match.match_results)
# Zero out the unmatched and ignored regression targets.
unmatched_ignored_reg_targets = tf.tile( unmatched_ignored_reg_targets = tf.tile(
self._default_regression_target(), self._default_regression_target(), [match_results_shape[0], 1])
tf.stack([tf.size(unmatched_ignored_anchor_indices), 1])) matched_anchors_mask = match.matched_column_indicator()
reg_targets = tf.dynamic_stitch( reg_targets = tf.where(matched_anchors_mask,
[matched_anchor_indices, unmatched_ignored_anchor_indices], matched_reg_targets,
[matched_reg_targets, unmatched_ignored_reg_targets]) unmatched_ignored_reg_targets)
# TODO: summarize the number of matches on average.
return reg_targets return reg_targets
def _default_regression_target(self): def _default_regression_target(self):
...@@ -245,27 +260,16 @@ class TargetAssigner(object): ...@@ -245,27 +260,16 @@ class TargetAssigner(object):
and groundtruth boxes. and groundtruth boxes.
Returns: Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
which has shape [num_gt_boxes, d_1, d_2, ... d_k]. shape [num_gt_boxes, d_1, d_2, ... d_k].
""" """
matched_anchor_indices = match.matched_column_indices() return match.gather_based_on_match(
unmatched_ignored_anchor_indices = (match. groundtruth_labels,
unmatched_or_ignored_column_indices()) unmatched_value=self._unmatched_cls_target,
matched_gt_indices = match.matched_row_indices() ignored_value=self._unmatched_cls_target)
matched_cls_targets = tf.gather(groundtruth_labels, matched_gt_indices)
def _create_regression_weights(self, match, groundtruth_weights):
ones = self._unmatched_cls_target.shape.ndims * [1]
unmatched_ignored_cls_targets = tf.tile(
tf.expand_dims(self._unmatched_cls_target, 0),
tf.stack([tf.size(unmatched_ignored_anchor_indices)] + ones))
cls_targets = tf.dynamic_stitch(
[matched_anchor_indices, unmatched_ignored_anchor_indices],
[matched_cls_targets, unmatched_ignored_cls_targets])
return cls_targets
def _create_regression_weights(self, match):
"""Set regression weight for each anchor. """Set regression weight for each anchor.
Only positive anchors are set to contribute to the regression loss, so this Only positive anchors are set to contribute to the regression loss, so this
...@@ -275,18 +279,18 @@ class TargetAssigner(object): ...@@ -275,18 +279,18 @@ class TargetAssigner(object):
Args: Args:
match: a matcher.Match object that provides a matching between anchors match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes. and groundtruth boxes.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box.
Returns: Returns:
reg_weights: a float32 tensor with shape [num_anchors] representing a float32 tensor with shape [num_anchors] representing regression weights.
regression weights
""" """
reg_weights = tf.cast(match.matched_column_indicator(), tf.float32) return match.gather_based_on_match(
return reg_weights groundtruth_weights, ignored_value=0., unmatched_value=0.)
def _create_classification_weights(self, def _create_classification_weights(self,
match, match,
positive_class_weight=1.0, groundtruth_weights):
negative_class_weight=1.0):
"""Create classification weights for each anchor. """Create classification weights for each anchor.
Positive (matched) anchors are associated with a weight of Positive (matched) anchors are associated with a weight of
...@@ -299,25 +303,23 @@ class TargetAssigner(object): ...@@ -299,25 +303,23 @@ class TargetAssigner(object):
Args: Args:
match: a matcher.Match object that provides a matching between anchors match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes. and groundtruth boxes.
positive_class_weight: weight to be associated to positive anchors groundtruth_weights: a float tensor of shape [M] indicating the weight to
negative_class_weight: weight to be associated to negative anchors assign to all anchors match to a particular groundtruth box.
Returns: Returns:
cls_weights: a float32 tensor with shape [num_anchors] representing a float32 tensor with shape [num_anchors] representing classification
classification weights. weights.
""" """
matched_indicator = tf.cast(match.matched_column_indicator(), tf.float32) return match.gather_based_on_match(
ignore_indicator = tf.cast(match.ignored_column_indicator(), tf.float32) groundtruth_weights,
unmatched_indicator = 1.0 - matched_indicator - ignore_indicator ignored_value=0.,
cls_weights = (positive_class_weight * matched_indicator unmatched_value=self._negative_class_weight)
+ negative_class_weight * unmatched_indicator)
return cls_weights
def get_box_coder(self): def get_box_coder(self):
"""Get BoxCoder of this TargetAssigner. """Get BoxCoder of this TargetAssigner.
Returns: Returns:
BoxCoder: BoxCoder object. BoxCoder object.
""" """
return self._box_coder return self._box_coder
...@@ -325,7 +327,6 @@ class TargetAssigner(object): ...@@ -325,7 +327,6 @@ class TargetAssigner(object):
# TODO: This method pulls in all the implementation dependencies into # TODO: This method pulls in all the implementation dependencies into
# core. Therefore its best to have this factory method outside of core. # core. Therefore its best to have this factory method outside of core.
def create_target_assigner(reference, stage=None, def create_target_assigner(reference, stage=None,
positive_class_weight=1.0,
negative_class_weight=1.0, negative_class_weight=1.0,
unmatched_cls_target=None): unmatched_cls_target=None):
"""Factory function for creating standard target assigners. """Factory function for creating standard target assigners.
...@@ -333,8 +334,6 @@ def create_target_assigner(reference, stage=None, ...@@ -333,8 +334,6 @@ def create_target_assigner(reference, stage=None,
Args: Args:
reference: string referencing the type of TargetAssigner. reference: string referencing the type of TargetAssigner.
stage: string denoting stage: {proposal, detection}. stage: string denoting stage: {proposal, detection}.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0) anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
...@@ -383,7 +382,6 @@ def create_target_assigner(reference, stage=None, ...@@ -383,7 +382,6 @@ def create_target_assigner(reference, stage=None,
raise ValueError('No valid combination of reference and stage.') raise ValueError('No valid combination of reference and stage.')
return TargetAssigner(similarity_calc, matcher, box_coder, return TargetAssigner(similarity_calc, matcher, box_coder,
positive_class_weight=positive_class_weight,
negative_class_weight=negative_class_weight, negative_class_weight=negative_class_weight,
unmatched_cls_target=unmatched_cls_target) unmatched_cls_target=unmatched_cls_target)
...@@ -391,7 +389,8 @@ def create_target_assigner(reference, stage=None, ...@@ -391,7 +389,8 @@ def create_target_assigner(reference, stage=None,
def batch_assign_targets(target_assigner, def batch_assign_targets(target_assigner,
anchors_batch, anchors_batch,
gt_box_batch, gt_box_batch,
gt_class_targets_batch): gt_class_targets_batch,
gt_weights_batch=None):
"""Batched assignment of classification and regression targets. """Batched assignment of classification and regression targets.
Args: Args:
...@@ -404,6 +403,8 @@ def batch_assign_targets(target_assigner, ...@@ -404,6 +403,8 @@ def batch_assign_targets(target_assigner,
each tensor has shape [num_gt_boxes_i, classification_target_size] and each tensor has shape [num_gt_boxes_i, classification_target_size] and
num_gt_boxes_i is the number of boxes in the ith boxlist of num_gt_boxes_i is the number of boxes in the ith boxlist of
gt_box_batch. gt_box_batch.
gt_weights_batch: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
Returns: Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors, batch_cls_targets: a tensor with shape [batch_size, num_anchors,
...@@ -437,11 +438,13 @@ def batch_assign_targets(target_assigner, ...@@ -437,11 +438,13 @@ def batch_assign_targets(target_assigner,
reg_targets_list = [] reg_targets_list = []
reg_weights_list = [] reg_weights_list = []
match_list = [] match_list = []
for anchors, gt_boxes, gt_class_targets in zip( if gt_weights_batch is None:
anchors_batch, gt_box_batch, gt_class_targets_batch): gt_weights_batch = [None] * len(gt_class_targets_batch)
for anchors, gt_boxes, gt_class_targets, gt_weights in zip(
anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch):
(cls_targets, cls_weights, reg_targets, (cls_targets, cls_weights, reg_targets,
reg_weights, match) = target_assigner.assign( reg_weights, match) = target_assigner.assign(
anchors, gt_boxes, gt_class_targets) anchors, gt_boxes, gt_class_targets, gt_weights)
cls_targets_list.append(cls_targets) cls_targets_list.append(cls_targets)
cls_weights_list.append(cls_weights) cls_weights_list.append(cls_weights)
reg_targets_list.append(reg_targets) reg_targets_list.append(reg_targets)
......
...@@ -17,135 +17,238 @@ ...@@ -17,135 +17,238 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from object_detection.box_coders import keypoint_box_coder
from object_detection.box_coders import mean_stddev_box_coder from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import region_similarity_calculator from object_detection.core import region_similarity_calculator
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner as targetassigner from object_detection.core import target_assigner as targetassigner
from object_detection.matchers import argmax_matcher from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher from object_detection.matchers import bipartite_matcher
from object_detection.utils import test_case
class TargetAssignerTest(tf.test.TestCase): class TargetAssignerTest(test_case.TestCase):
def test_assign_agnostic(self): def test_assign_agnostic(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners):
matcher = bipartite_matcher.GreedyBipartiteMatcher() similarity_calc = region_similarity_calculator.IouSimilarity()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
target_assigner = targetassigner.TargetAssigner( unmatched_threshold=0.5)
similarity_calc, matcher, box_coder, unmatched_cls_target=None) box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], similarity_calc, matcher, box_coder, unmatched_cls_target=None)
[0.5, 0.5, 1.0, 0.8], anchors_boxlist = box_list.BoxList(anchor_means)
[0, 0.5, .5, 1.0]]) anchors_boxlist.add_field('stddev', anchor_stddevs)
prior_stddevs = tf.constant(3 * [4 * [.1]]) groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
priors = box_list.BoxList(prior_means) result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
priors.add_field('stddev', prior_stddevs) (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9]]
boxes = box_list.BoxList(tf.constant(box_corners)) anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0]], dtype=np.float32)
anchor_stddevs = np.array(3 * [4 * [.1]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]],
dtype=np.float32)
exp_cls_targets = [[1], [1], [0]] exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1] exp_cls_weights = [1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0], exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1], [0, 0, -1, 1],
[0, 0, 0, 0]] [0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0] exp_reg_weights = [1, 1, 0]
exp_matching_anchors = [0, 1]
(cls_targets_out, cls_weights_out, reg_targets_out,
result = target_assigner.assign(priors, boxes, num_valid_rows=2) reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result groundtruth_box_corners])
self.assertAllClose(cls_targets_out, exp_cls_targets)
with self.test_session() as sess: self.assertAllClose(cls_weights_out, exp_cls_weights)
(cls_targets_out, cls_weights_out, self.assertAllClose(reg_targets_out, exp_reg_targets)
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run( self.assertAllClose(reg_weights_out, exp_reg_weights)
[cls_targets, cls_weights, reg_targets, reg_weights, self.assertEquals(cls_targets_out.dtype, np.float32)
match.matched_column_indices()]) self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertAllClose(cls_targets_out, exp_cls_targets) self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets) def test_assign_class_agnostic_with_ignored_matches(self):
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_with_ignored_matches(self):
# Note: test is very similar to above. The third box matched with an IOU # Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means # of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0]. # That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected # Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0]. # classification weights are [1, 1, 0].
similarity_calc = region_similarity_calculator.IouSimilarity() def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners):
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5, similarity_calc = region_similarity_calculator.IouSimilarity()
unmatched_threshold=0.3) matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() unmatched_threshold=0.3)
target_assigner = targetassigner.TargetAssigner( box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
similarity_calc, matcher, box_coder) target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], anchors_boxlist = box_list.BoxList(anchor_means)
[0.5, 0.5, 1.0, 0.8], anchors_boxlist.add_field('stddev', anchor_stddevs)
[0.0, 0.5, .9, 1.0]]) groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
prior_stddevs = tf.constant(3 * [4 * [.1]]) result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
priors = box_list.BoxList(prior_means) (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
priors.add_field('stddev', prior_stddevs) return (cls_targets, cls_weights, reg_targets, reg_weights)
box_corners = [[0.0, 0.0, 0.5, 0.5], anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]] [0.5, 0.5, 1.0, 0.8],
boxes = box_list.BoxList(tf.constant(box_corners)) [0.0, 0.5, .9, 1.0]], dtype=np.float32)
anchor_stddevs = np.array(3 * [4 * [.1]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]], dtype=np.float32)
exp_cls_targets = [[1], [1], [0]] exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 0] exp_cls_weights = [1, 1, 0]
exp_reg_targets = [[0, 0, 0, 0], exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1], [0, 0, -1, 1],
[0, 0, 0, 0]] [0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0] exp_reg_weights = [1, 1, 0]
exp_matching_anchors = [0, 1] (cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
result = target_assigner.assign(priors, boxes) groundtruth_box_corners])
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result self.assertAllClose(cls_targets_out, exp_cls_targets)
with self.test_session() as sess: self.assertAllClose(cls_weights_out, exp_cls_weights)
(cls_targets_out, cls_weights_out, self.assertAllClose(reg_targets_out, exp_reg_targets)
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run( self.assertAllClose(reg_weights_out, exp_reg_weights)
[cls_targets, cls_weights, reg_targets, reg_weights, self.assertEquals(cls_targets_out.dtype, np.float32)
match.matched_column_indices()]) self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertAllClose(cls_targets_out, exp_cls_targets) self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets) def test_assign_agnostic_with_keypoints(self):
self.assertAllClose(reg_weights_out, exp_reg_weights) def graph_fn(anchor_means, groundtruth_box_corners,
self.assertAllClose(matching_anchors_out, exp_matching_anchors) groundtruth_keypoints):
self.assertEquals(cls_targets_out.dtype, np.float32) similarity_calc = region_similarity_calculator.IouSimilarity()
self.assertEquals(cls_weights_out.dtype, np.float32) matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
self.assertEquals(reg_targets_out.dtype, np.float32) unmatched_threshold=0.5)
self.assertEquals(reg_weights_out.dtype, np.float32) box_coder = keypoint_box_coder.KeypointBoxCoder(
self.assertEquals(matching_anchors_out.dtype, np.int32) num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
anchors_boxlist = box_list.BoxList(anchor_means)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
groundtruth_keypoints)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 1.0],
[0.0, 0.5, .9, 1.0]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.45, 0.45, 0.95, 0.95]],
dtype=np.float32)
groundtruth_keypoints = np.array(
[[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]],
[[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
dtype=np.float32)
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
-5],
[-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
-11, -7],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means,
groundtruth_box_corners,
groundtruth_keypoints])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
def test_assign_class_agnostic_with_keypoints_and_ignored_matches(self):
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
def graph_fn(anchor_means, groundtruth_box_corners,
groundtruth_keypoints):
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
anchors_boxlist = box_list.BoxList(anchor_means)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
groundtruth_keypoints)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 1.0],
[0.0, 0.5, .9, 1.0]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.45, 0.45, 0.95, 0.95]],
dtype=np.float32)
groundtruth_keypoints = np.array(
[[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]],
[[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
dtype=np.float32)
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
-5],
[-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
-11, -7],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
(cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means,
groundtruth_box_corners,
groundtruth_keypoints])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
def test_assign_multiclass(self): def test_assign_multiclass(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
matcher = bipartite_matcher.GreedyBipartiteMatcher() groundtruth_labels):
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() similarity_calc = region_similarity_calculator.IouSimilarity()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32) matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
target_assigner = targetassigner.TargetAssigner( unmatched_threshold=0.5)
similarity_calc, matcher, box_coder, box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target=unmatched_cls_target) unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], similarity_calc, matcher, box_coder,
[0.5, 0.5, 1.0, 0.8], unmatched_cls_target=unmatched_cls_target)
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]]) anchors_boxlist = box_list.BoxList(anchor_means)
prior_stddevs = tf.constant(4 * [4 * [.1]]) anchors_boxlist.add_field('stddev', anchor_stddevs)
priors = box_list.BoxList(prior_means) groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
priors.add_field('stddev', prior_stddevs) result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
groundtruth_labels)
box_corners = [[0.0, 0.0, 0.5, 0.5], (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
[0.5, 0.5, 0.9, 0.9], return (cls_targets, cls_weights, reg_targets, reg_weights)
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners)) anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0], [0, 0.5, .5, 1.0],
[0, 0, 0, 0, 0, 1, 0], [.75, 0, 1.0, .25]], dtype=np.float32)
[0, 0, 0, 1, 0, 0, 0]], tf.float32) anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]], dtype=np.float32)
groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0], exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1, 0],
...@@ -157,88 +260,98 @@ class TargetAssignerTest(tf.test.TestCase): ...@@ -157,88 +260,98 @@ class TargetAssignerTest(tf.test.TestCase):
[0, 0, 0, 0], [0, 0, 0, 0],
[0, 0, -.5, .2]] [0, 0, -.5, .2]]
exp_reg_weights = [1, 1, 0, 1] exp_reg_weights = [1, 1, 0, 1]
exp_matching_anchors = [0, 1, 3]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_multiclass_unequal_class_weights(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0, negative_class_weight=0.5,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], (cls_targets_out, cls_weights_out, reg_targets_out,
[0.5, 0.5, 1.0, 0.8], reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
[0, 0.5, .5, 1.0], groundtruth_box_corners,
[.75, 0, 1.0, .25]]) groundtruth_labels])
prior_stddevs = tf.constant(4 * [4 * [.1]]) self.assertAllClose(cls_targets_out, exp_cls_targets)
priors = box_list.BoxList(prior_means) self.assertAllClose(cls_weights_out, exp_cls_weights)
priors.add_field('stddev', prior_stddevs) self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
box_corners = [[0.0, 0.0, 0.5, 0.5], self.assertEquals(cls_targets_out.dtype, np.float32)
[0.5, 0.5, 0.9, 0.9], self.assertEquals(cls_weights_out.dtype, np.float32)
[.75, 0, .95, .27]] self.assertEquals(reg_targets_out.dtype, np.float32)
boxes = box_list.BoxList(tf.constant(box_corners)) self.assertEquals(reg_weights_out.dtype, np.float32)
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0], def test_assign_multiclass_with_groundtruth_weights(self):
[0, 0, 0, 0, 0, 1, 0], def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
[0, 0, 0, 1, 0, 0, 0]], tf.float32) groundtruth_labels, groundtruth_weights):
similarity_calc = region_similarity_calculator.IouSimilarity()
exp_cls_weights = [1, 1, .5, 1] matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
result = target_assigner.assign(priors, boxes, groundtruth_labels, unmatched_threshold=0.5)
num_valid_rows=3) box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
(_, cls_weights, _, _, _) = result unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
with self.test_session() as sess: target_assigner = targetassigner.TargetAssigner(
cls_weights_out = sess.run(cls_weights) similarity_calc, matcher, box_coder,
self.assertAllClose(cls_weights_out, exp_cls_weights) unmatched_cls_target=unmatched_cls_target)
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
groundtruth_labels,
groundtruth_weights)
(_, cls_weights, _, reg_weights, _) = result
return (cls_weights, reg_weights)
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]], dtype=np.float32)
anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]], dtype=np.float32)
groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
groundtruth_weights = np.array([0.3, 0., 0.5], dtype=np.float32)
exp_cls_weights = [0.3, 0., 1, 0.5] # background class gets weight of 1.
exp_reg_weights = [0.3, 0., 0., 0.5] # background class gets weight of 0.
(cls_weights_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_box_corners,
groundtruth_labels,
groundtruth_weights])
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_weights_out, exp_reg_weights)
def test_assign_multidimensional_class_targets(self): def test_assign_multidimensional_class_targets(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
matcher = bipartite_matcher.GreedyBipartiteMatcher() groundtruth_labels):
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() similarity_calc = region_similarity_calculator.IouSimilarity()
unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32) matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
target_assigner = targetassigner.TargetAssigner( unmatched_threshold=0.5)
similarity_calc, matcher, box_coder, box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target=unmatched_cls_target)
unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], target_assigner = targetassigner.TargetAssigner(
[0.5, 0.5, 1.0, 0.8], similarity_calc, matcher, box_coder,
[0, 0.5, .5, 1.0], unmatched_cls_target=unmatched_cls_target)
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]]) anchors_boxlist = box_list.BoxList(anchor_means)
priors = box_list.BoxList(prior_means) anchors_boxlist.add_field('stddev', anchor_stddevs)
priors.add_field('stddev', prior_stddevs) groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
box_corners = [[0.0, 0.0, 0.5, 0.5], groundtruth_labels)
[0.5, 0.5, 0.9, 0.9], (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
[.75, 0, .95, .27]] return (cls_targets, cls_weights, reg_targets, reg_weights)
boxes = box_list.BoxList(tf.constant(box_corners))
anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
groundtruth_labels = tf.constant([[[0, 1], [1, 0]], [0.5, 0.5, 1.0, 0.8],
[[1, 0], [0, 1]], [0, 0.5, .5, 1.0],
[[0, 1], [1, .5]]], tf.float32) [.75, 0, 1.0, .25]], dtype=np.float32)
anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]], dtype=np.float32)
groundtruth_labels = np.array([[[0, 1], [1, 0]],
[[1, 0], [0, 1]],
[[0, 1], [1, .5]]], np.float32)
exp_cls_targets = [[[0, 1], [1, 0]], exp_cls_targets = [[[0, 1], [1, 0]],
[[1, 0], [0, 1]], [[1, 0], [0, 1]],
...@@ -250,52 +363,46 @@ class TargetAssignerTest(tf.test.TestCase): ...@@ -250,52 +363,46 @@ class TargetAssignerTest(tf.test.TestCase):
[0, 0, 0, 0], [0, 0, 0, 0],
[0, 0, -.5, .2]] [0, 0, -.5, .2]]
exp_reg_weights = [1, 1, 0, 1] exp_reg_weights = [1, 1, 0, 1]
exp_matching_anchors = [0, 1, 3] (cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
result = target_assigner.assign(priors, boxes, groundtruth_labels, groundtruth_box_corners,
num_valid_rows=3) groundtruth_labels])
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result self.assertAllClose(cls_targets_out, exp_cls_targets)
with self.test_session() as sess: self.assertAllClose(cls_weights_out, exp_cls_weights)
(cls_targets_out, cls_weights_out, self.assertAllClose(reg_targets_out, exp_reg_targets)
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run( self.assertAllClose(reg_weights_out, exp_reg_weights)
[cls_targets, cls_weights, reg_targets, reg_weights, self.assertEquals(cls_targets_out.dtype, np.float32)
match.matched_column_indices()]) self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertAllClose(cls_targets_out, exp_cls_targets) self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_empty_groundtruth(self): def test_assign_empty_groundtruth(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
matcher = bipartite_matcher.GreedyBipartiteMatcher() groundtruth_labels):
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() similarity_calc = region_similarity_calculator.IouSimilarity()
unmatched_cls_target = tf.constant([0, 0, 0], tf.float32) matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
target_assigner = targetassigner.TargetAssigner( unmatched_threshold=0.5)
similarity_calc, matcher, box_coder, box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target=unmatched_cls_target) unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
anchors_boxlist = box_list.BoxList(anchor_means)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], anchors_boxlist.add_field('stddev', anchor_stddevs)
[0.5, 0.5, 1.0, 0.8], groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
[0, 0.5, .5, 1.0], target_assigner = targetassigner.TargetAssigner(
[.75, 0, 1.0, .25]]) similarity_calc, matcher, box_coder,
prior_stddevs = tf.constant(4 * [4 * [.1]]) unmatched_cls_target=unmatched_cls_target)
priors = box_list.BoxList(prior_means) result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
priors.add_field('stddev', prior_stddevs) groundtruth_labels)
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result
box_corners_expanded = tf.constant([[0.0, 0.0, 0.0, 0.0]]) return (cls_targets, cls_weights, reg_targets, reg_weights)
box_corners = tf.slice(box_corners_expanded, [0, 0], [0, 4])
boxes = box_list.BoxList(box_corners) groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
groundtruth_labels = np.zeros((0, 3), dtype=np.float32)
groundtruth_labels_expanded = tf.constant([[0, 0, 0]], tf.float32) anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
groundtruth_labels = tf.slice(groundtruth_labels_expanded, [0, 0], [0, 3]) [0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]],
dtype=np.float32)
anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
exp_cls_targets = [[0, 0, 0], exp_cls_targets = [[0, 0, 0],
[0, 0, 0], [0, 0, 0],
[0, 0, 0], [0, 0, 0],
...@@ -306,26 +413,18 @@ class TargetAssignerTest(tf.test.TestCase): ...@@ -306,26 +413,18 @@ class TargetAssignerTest(tf.test.TestCase):
[0, 0, 0, 0], [0, 0, 0, 0],
[0, 0, 0, 0]] [0, 0, 0, 0]]
exp_reg_weights = [0, 0, 0, 0] exp_reg_weights = [0, 0, 0, 0]
exp_matching_anchors = [] (cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
result = target_assigner.assign(priors, boxes, groundtruth_labels) groundtruth_box_corners,
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result groundtruth_labels])
with self.test_session() as sess: self.assertAllClose(cls_targets_out, exp_cls_targets)
(cls_targets_out, cls_weights_out, self.assertAllClose(cls_weights_out, exp_cls_weights)
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run( self.assertAllClose(reg_targets_out, exp_reg_targets)
[cls_targets, cls_weights, reg_targets, reg_weights, self.assertAllClose(reg_weights_out, exp_reg_weights)
match.matched_column_indices()]) self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertAllClose(cls_targets_out, exp_cls_targets) self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertAllClose(cls_weights_out, exp_cls_weights) self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self): def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
...@@ -353,14 +452,9 @@ class TargetAssignerTest(tf.test.TestCase): ...@@ -353,14 +452,9 @@ class TargetAssignerTest(tf.test.TestCase):
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0], groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], tf.float32) [0, 0, 0, 1, 0, 0, 0]], tf.float32)
result = target_assigner.assign(priors, boxes, groundtruth_labels, with self.assertRaisesRegexp(ValueError, 'Unequal shapes'):
num_valid_rows=3) target_assigner.assign(priors, boxes, groundtruth_labels,
(cls_targets, cls_weights, reg_targets, reg_weights, _) = result num_valid_rows=3)
with self.test_session() as sess:
with self.assertRaisesWithPredicateMatch(
tf.errors.InvalidArgumentError,
'Groundtruth boxes and labels have incompatible shapes!'):
sess.run([cls_targets, cls_weights, reg_targets, reg_weights])
def test_raises_error_on_invalid_groundtruth_labels(self): def test_raises_error_on_invalid_groundtruth_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
...@@ -380,7 +474,6 @@ class TargetAssignerTest(tf.test.TestCase): ...@@ -380,7 +474,6 @@ class TargetAssignerTest(tf.test.TestCase):
[0.5, 0.5, 0.9, 0.9], [0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]] [.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners)) boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32) groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
...@@ -388,61 +481,66 @@ class TargetAssignerTest(tf.test.TestCase): ...@@ -388,61 +481,66 @@ class TargetAssignerTest(tf.test.TestCase):
num_valid_rows=3) num_valid_rows=3)
class BatchTargetAssignerTest(tf.test.TestCase): class BatchTargetAssignerTest(test_case.TestCase):
def _get_agnostic_target_assigner(self): def _get_agnostic_target_assigner(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
return targetassigner.TargetAssigner( return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=None) unmatched_cls_target=None)
def _get_multi_class_target_assigner(self, num_classes): def _get_multi_class_target_assigner(self, num_classes):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32) unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32)
return targetassigner.TargetAssigner( return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=unmatched_cls_target) unmatched_cls_target=unmatched_cls_target)
def _get_multi_dimensional_target_assigner(self, target_dimensions): def _get_multi_dimensional_target_assigner(self, target_dimensions):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher() matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.5)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder() box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant(np.zeros(target_dimensions), unmatched_cls_target = tf.constant(np.zeros(target_dimensions),
tf.float32) tf.float32)
return targetassigner.TargetAssigner( return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=unmatched_cls_target) unmatched_cls_target=unmatched_cls_target)
def test_batch_assign_targets(self): def test_batch_assign_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]])) def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
box_list2 = box_list.BoxList(tf.constant( groundtruth_boxlist2):
[[0, 0.25123152, 1, 1], box_list1 = box_list.BoxList(groundtruth_boxlist1)
[0.015789, 0.0985, 0.55789, 0.3842]] box_list2 = box_list.BoxList(groundtruth_boxlist2)
)) gt_box_batch = [box_list1, box_list2]
gt_class_targets = [None, None]
gt_box_batch = [box_list1, box_list2] anchors_boxlist = box_list.BoxList(anchor_means)
gt_class_targets = [None, None] anchors_boxlist.add_field('stddev', anchor_stddevs)
agnostic_target_assigner = self._get_agnostic_target_assigner()
prior_means = tf.constant([[0, 0, .25, .25], (cls_targets, cls_weights, reg_targets, reg_weights,
[0, .25, 1, 1], _) = targetassigner.batch_assign_targets(
[0, .1, .5, .5], agnostic_target_assigner, anchors_boxlist, gt_box_batch,
[.75, .75, 1, 1]]) gt_class_targets)
prior_stddevs = tf.constant([[.1, .1, .1, .1], return (cls_targets, cls_weights, reg_targets, reg_weights)
[.1, .1, .1, .1],
[.1, .1, .1, .1], groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
[.1, .1, .1, .1]]) groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
priors = box_list.BoxList(prior_means) [0.015789, 0.0985, 0.55789, 0.3842]],
priors.add_field('stddev', prior_stddevs) dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
anchor_stddevs = np.array([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, -0.5, -0.5], exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0], [0, 0, 0, 0],
...@@ -458,58 +556,128 @@ class BatchTargetAssignerTest(tf.test.TestCase): ...@@ -458,58 +556,128 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[[0], [1], [1], [0]]] [[0], [1], [1], [0]]]
exp_reg_weights = [[1, 0, 0, 0], exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]] [0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
agnostic_target_assigner = self._get_agnostic_target_assigner()
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
agnostic_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_multiclass_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2] (cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
groundtruth_boxlist1,
groundtruth_boxlist2])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
class_targets1 = tf.constant([[0, 1, 0, 0]], tf.float32) def test_batch_assign_multiclass_targets(self):
class_targets2 = tf.constant([[0, 0, 0, 1], def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
[0, 0, 1, 0]], tf.float32) groundtruth_boxlist2, class_targets1, class_targets2):
box_list1 = box_list.BoxList(groundtruth_boxlist1)
box_list2 = box_list.BoxList(groundtruth_boxlist2)
gt_box_batch = [box_list1, box_list2]
gt_class_targets = [class_targets1, class_targets2]
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
_) = targetassigner.batch_assign_targets(
multiclass_target_assigner, anchors_boxlist, gt_box_batch,
gt_class_targets)
return (cls_targets, cls_weights, reg_targets, reg_weights)
groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]],
dtype=np.float32)
class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
class_targets2 = np.array([[0, 0, 0, 1],
[0, 0, 1, 0]], dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
anchor_stddevs = np.array([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]], dtype=np.float32)
gt_class_targets = [class_targets1, class_targets2] exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[0, 1, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0]],
[[1, 0, 0, 0],
[0, 0, 0, 1],
[0, 0, 1, 0],
[1, 0, 0, 0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
prior_means = tf.constant([[0, 0, .25, .25], (cls_targets_out, cls_weights_out, reg_targets_out,
[0, .25, 1, 1], reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
[0, .1, .5, .5], groundtruth_boxlist1,
[.75, .75, 1, 1]]) groundtruth_boxlist2,
prior_stddevs = tf.constant([[.1, .1, .1, .1], class_targets1,
[.1, .1, .1, .1], class_targets2])
[.1, .1, .1, .1], self.assertAllClose(cls_targets_out, exp_cls_targets)
[.1, .1, .1, .1]]) self.assertAllClose(cls_weights_out, exp_cls_weights)
priors = box_list.BoxList(prior_means) self.assertAllClose(reg_targets_out, exp_reg_targets)
priors.add_field('stddev', prior_stddevs) self.assertAllClose(reg_weights_out, exp_reg_weights)
def test_batch_assign_multiclass_targets_with_padded_groundtruth(self):
def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
groundtruth_boxlist2, class_targets1, class_targets2,
groundtruth_weights1, groundtruth_weights2):
box_list1 = box_list.BoxList(groundtruth_boxlist1)
box_list2 = box_list.BoxList(groundtruth_boxlist2)
gt_box_batch = [box_list1, box_list2]
gt_class_targets = [class_targets1, class_targets2]
gt_weights = [groundtruth_weights1, groundtruth_weights2]
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
_) = targetassigner.batch_assign_targets(
multiclass_target_assigner, anchors_boxlist, gt_box_batch,
gt_class_targets, gt_weights)
return (cls_targets, cls_weights, reg_targets, reg_weights)
groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2],
[0., 0., 0., 0.]], dtype=np.float32)
groundtruth_weights1 = np.array([1, 0], dtype=np.float32)
groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842],
[0, 0, 0, 0]],
dtype=np.float32)
groundtruth_weights2 = np.array([1, 1, 0], dtype=np.float32)
class_targets1 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], dtype=np.float32)
class_targets2 = np.array([[0, 0, 0, 1],
[0, 0, 1, 0],
[0, 0, 0, 0]], dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
anchor_stddevs = np.array([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, -0.5, -0.5], exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0], [0, 0, 0, 0],
[0, 0, 0, 0], [0, 0, 0, 0,],
[0, 0, 0, 0]], [0, 0, 0, 0,],],
[[0, 0, 0, 0], [[0, 0, 0, 0,],
[0, 0.01231521, 0, 0], [0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987], [0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]] [0, 0, 0, 0]]]
...@@ -525,68 +693,70 @@ class BatchTargetAssignerTest(tf.test.TestCase): ...@@ -525,68 +693,70 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[1, 0, 0, 0]]] [1, 0, 0, 0]]]
exp_reg_weights = [[1, 0, 0, 0], exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]] [0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2] (cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
multiclass_target_assigner = self._get_multi_class_target_assigner( groundtruth_boxlist1,
num_classes=3) groundtruth_boxlist2,
class_targets1,
(cls_targets, cls_weights, reg_targets, reg_weights, class_targets2,
match_list) = targetassigner.batch_assign_targets( groundtruth_weights1,
multiclass_target_assigner, priors, gt_box_batch, gt_class_targets) groundtruth_weights2])
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2) self.assertAllClose(cls_targets_out, exp_cls_targets)
with self.test_session() as sess: self.assertAllClose(cls_weights_out, exp_cls_weights)
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out, self.assertAllClose(reg_targets_out, exp_reg_targets)
match_out_0, match_out_1) = sess.run([ self.assertAllClose(reg_weights_out, exp_reg_weights)
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_multidimensional_targets(self): def test_batch_assign_multidimensional_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]])) def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
groundtruth_boxlist2, class_targets1, class_targets2):
box_list2 = box_list.BoxList(tf.constant( box_list1 = box_list.BoxList(groundtruth_boxlist1)
[[0, 0.25123152, 1, 1], box_list2 = box_list.BoxList(groundtruth_boxlist2)
[0.015789, 0.0985, 0.55789, 0.3842]] gt_box_batch = [box_list1, box_list2]
)) gt_class_targets = [class_targets1, class_targets2]
anchors_boxlist = box_list.BoxList(anchor_means)
gt_box_batch = [box_list1, box_list2] anchors_boxlist.add_field('stddev', anchor_stddevs)
class_targets1 = tf.constant([[[0, 1, 1], multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
[1, 1, 0]]], tf.float32) target_dimensions=(2, 3))
class_targets2 = tf.constant([[[0, 1, 1], (cls_targets, cls_weights, reg_targets, reg_weights,
[1, 1, 0]], _) = targetassigner.batch_assign_targets(
[[0, 0, 1], multiclass_target_assigner, anchors_boxlist, gt_box_batch,
[0, 0, 1]]], tf.float32) gt_class_targets)
return (cls_targets, cls_weights, reg_targets, reg_weights)
gt_class_targets = [class_targets1, class_targets2]
groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
prior_means = tf.constant([[0, 0, .25, .25], groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
[0, .25, 1, 1], [0.015789, 0.0985, 0.55789, 0.3842]],
[0, .1, .5, .5], dtype=np.float32)
[.75, .75, 1, 1]]) class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
prior_stddevs = tf.constant([[.1, .1, .1, .1], class_targets2 = np.array([[0, 0, 0, 1],
[.1, .1, .1, .1], [0, 0, 1, 0]], dtype=np.float32)
[.1, .1, .1, .1], class_targets1 = np.array([[[0, 1, 1],
[.1, .1, .1, .1]]) [1, 1, 0]]], dtype=np.float32)
priors = box_list.BoxList(prior_means) class_targets2 = np.array([[[0, 1, 1],
priors.add_field('stddev', prior_stddevs) [1, 1, 0]],
[[0, 0, 1],
[0, 0, 1]]], dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
anchor_stddevs = np.array([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, -0.5, -0.5], exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0], [0, 0, 0, 0],
[0, 0, 0, 0], [0, 0, 0, 0,],
[0, 0, 0, 0]], [0, 0, 0, 0,],],
[[0, 0, 0, 0], [[0, 0, 0, 0,],
[0, 0.01231521, 0, 0], [0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987], [0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]] [0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1], exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]] [1, 1, 1, 1]]
exp_cls_targets = [[[[0., 1., 1.], exp_cls_targets = [[[[0., 1., 1.],
[1., 1., 0.]], [1., 1., 0.]],
[[0., 0., 0.], [[0., 0., 0.],
...@@ -605,72 +775,60 @@ class BatchTargetAssignerTest(tf.test.TestCase): ...@@ -605,72 +775,60 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[0., 0., 0.]]]] [0., 0., 0.]]]]
exp_reg_weights = [[1, 0, 0, 0], exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]] [0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2] (cls_targets_out, cls_weights_out, reg_targets_out,
reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
multiclass_target_assigner = self._get_multi_dimensional_target_assigner( groundtruth_boxlist1,
target_dimensions=(2, 3)) groundtruth_boxlist2,
class_targets1,
(cls_targets, cls_weights, reg_targets, reg_weights, class_targets2])
match_list) = targetassigner.batch_assign_targets( self.assertAllClose(cls_targets_out, exp_cls_targets)
multiclass_target_assigner, priors, gt_box_batch, gt_class_targets) self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2) self.assertAllClose(reg_targets_out, exp_reg_targets)
with self.test_session() as sess: self.assertAllClose(reg_weights_out, exp_reg_weights)
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_empty_groundtruth(self): def test_batch_assign_empty_groundtruth(self):
box_coords_expanded = tf.zeros((1, 4), tf.float32)
box_coords = tf.slice(box_coords_expanded, [0, 0], [0, 4])
box_list1 = box_list.BoxList(box_coords)
gt_box_batch = [box_list1]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
gt_class_targets):
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
gt_box_batch = [groundtruth_boxlist]
gt_class_targets_batch = [gt_class_targets]
anchors_boxlist = box_list.BoxList(anchor_means)
anchors_boxlist.add_field('stddev', anchor_stddevs)
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
_) = targetassigner.batch_assign_targets(
multiclass_target_assigner, anchors_boxlist,
gt_box_batch, gt_class_targets_batch)
return (cls_targets, cls_weights, reg_targets, reg_weights)
groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1]], dtype=np.float32)
anchor_stddevs = np.array([[.1, .1, .1, .1],
[.1, .1, .1, .1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, 0, 0], exp_reg_targets = [[[0, 0, 0, 0],
[0, 0, 0, 0]]] [0, 0, 0, 0]]]
exp_cls_weights = [[1, 1]] exp_cls_weights = [[1, 1]]
exp_cls_targets = [[[1, 0, 0, 0], exp_cls_targets = [[[1, 0, 0, 0],
[1, 0, 0, 0]]] [1, 0, 0, 0]]]
exp_reg_weights = [[0, 0]] exp_reg_weights = [[0, 0]]
exp_match_0 = []
num_classes = 3 num_classes = 3
pad = 1 pad = 1
gt_class_targets = tf.zeros((0, num_classes + pad)) gt_class_targets = np.zeros((0, num_classes + pad), dtype=np.float32)
gt_class_targets_batch = [gt_class_targets]
(cls_targets_out, cls_weights_out, reg_targets_out,
multiclass_target_assigner = self._get_multi_class_target_assigner( reg_weights_out) = self.execute(
num_classes=3) graph_fn, [anchor_means, anchor_stddevs, groundtruth_box_corners,
gt_class_targets])
(cls_targets, cls_weights, reg_targets, reg_weights, self.assertAllClose(cls_targets_out, exp_cls_targets)
match_list) = targetassigner.batch_assign_targets( self.assertAllClose(cls_weights_out, exp_cls_weights)
multiclass_target_assigner, priors, self.assertAllClose(reg_targets_out, exp_reg_targets)
gt_box_batch, gt_class_targets_batch) self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 1)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
class CreateTargetAssignerTest(tf.test.TestCase): class CreateTargetAssignerTest(tf.test.TestCase):
......
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
exports_files([
"pet_label_map.pbtxt",
])
...@@ -12,9 +12,10 @@ py_library( ...@@ -12,9 +12,10 @@ py_library(
srcs = ["tf_example_decoder.py"], srcs = ["tf_example_decoder.py"],
deps = [ deps = [
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/core:data_decoder", "//tensorflow/models/research/object_detection/core:data_decoder",
"//tensorflow_models/object_detection/core:standard_fields", "//tensorflow/models/research/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/utils:label_map_util", "//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
"//tensorflow/models/research/object_detection/utils:label_map_util",
], ],
) )
...@@ -24,6 +25,7 @@ py_test( ...@@ -24,6 +25,7 @@ py_test(
deps = [ deps = [
":tf_example_decoder", ":tf_example_decoder",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields", "//tensorflow/models/research/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
], ],
) )
...@@ -22,6 +22,7 @@ import tensorflow as tf ...@@ -22,6 +22,7 @@ import tensorflow as tf
from object_detection.core import data_decoder from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
slim_example_decoder = tf.contrib.slim.tfexample_decoder slim_example_decoder = tf.contrib.slim.tfexample_decoder
...@@ -32,12 +33,15 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -32,12 +33,15 @@ class TfExampleDecoder(data_decoder.DataDecoder):
def __init__(self, def __init__(self,
load_instance_masks=False, load_instance_masks=False,
instance_mask_type=input_reader_pb2.NUMERICAL_MASKS,
label_map_proto_file=None, label_map_proto_file=None,
use_display_name=False): use_display_name=False):
"""Constructor sets keys_to_features and items_to_handlers. """Constructor sets keys_to_features and items_to_handlers.
Args: Args:
load_instance_masks: whether or not to load and handle instance masks. load_instance_masks: whether or not to load and handle instance masks.
instance_mask_type: type of instance masks. Options are provided in
input_reader.proto. This is only used if `load_instance_masks` is True.
label_map_proto_file: a file path to a label_map_proto_file: a file path to a
object_detection.protos.StringIntLabelMap proto. If provided, then the object_detection.protos.StringIntLabelMap proto. If provided, then the
mapped IDs of 'image/object/class/text' will take precedence over the mapped IDs of 'image/object/class/text' will take precedence over the
...@@ -46,6 +50,11 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -46,6 +50,11 @@ class TfExampleDecoder(data_decoder.DataDecoder):
use_display_name: whether or not to use the `display_name` for label use_display_name: whether or not to use the `display_name` for label
mapping (instead of `name`). Only used if label_map_proto_file is mapping (instead of `name`). Only used if label_map_proto_file is
provided. provided.
Raises:
ValueError: If `instance_mask_type` option is not one of
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.PNG_MASKS.
""" """
self.keys_to_features = { self.keys_to_features = {
'image/encoded': 'image/encoded':
...@@ -83,6 +92,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -83,6 +92,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tf.VarLenFeature(tf.int64), tf.VarLenFeature(tf.int64),
'image/object/group_of': 'image/object/group_of':
tf.VarLenFeature(tf.int64), tf.VarLenFeature(tf.int64),
'image/object/weight':
tf.VarLenFeature(tf.float32),
} }
self.items_to_handlers = { self.items_to_handlers = {
fields.InputDataFields.image: slim_example_decoder.Image( fields.InputDataFields.image: slim_example_decoder.Image(
...@@ -104,19 +115,47 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -104,19 +115,47 @@ class TfExampleDecoder(data_decoder.DataDecoder):
fields.InputDataFields.groundtruth_difficult: ( fields.InputDataFields.groundtruth_difficult: (
slim_example_decoder.Tensor('image/object/difficult')), slim_example_decoder.Tensor('image/object/difficult')),
fields.InputDataFields.groundtruth_group_of: ( fields.InputDataFields.groundtruth_group_of: (
slim_example_decoder.Tensor('image/object/group_of')) slim_example_decoder.Tensor('image/object/group_of')),
fields.InputDataFields.groundtruth_weights: (
slim_example_decoder.Tensor('image/object/weight')),
} }
if load_instance_masks: if load_instance_masks:
self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.float32) if instance_mask_type in (input_reader_pb2.DEFAULT,
self.items_to_handlers[ input_reader_pb2.NUMERICAL_MASKS):
fields.InputDataFields.groundtruth_instance_masks] = ( self.keys_to_features['image/object/mask'] = (
slim_example_decoder.ItemHandlerCallback( tf.VarLenFeature(tf.float32))
['image/object/mask', 'image/height', 'image/width'], self.items_to_handlers[
self._reshape_instance_masks)) fields.InputDataFields.groundtruth_instance_masks] = (
# TODO: Add label_handler that decodes from 'image/object/class/text' slim_example_decoder.ItemHandlerCallback(
# primarily after the recent tf.contrib.slim changes make into a release ['image/object/mask', 'image/height', 'image/width'],
# supported by cloudml. self._reshape_instance_masks))
label_handler = slim_example_decoder.Tensor('image/object/class/label') elif instance_mask_type == input_reader_pb2.PNG_MASKS:
self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string)
self.items_to_handlers[
fields.InputDataFields.groundtruth_instance_masks] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/mask', 'image/height', 'image/width'],
self._decode_png_instance_masks))
else:
raise ValueError('Did not recognize the `instance_mask_type` option.')
if label_map_proto_file:
label_map = label_map_util.get_label_map_dict(label_map_proto_file,
use_display_name)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
table = tf.contrib.lookup.HashTable(
initializer=tf.contrib.lookup.KeyValueTensorInitializer(
keys=tf.constant(list(label_map.keys())),
values=tf.constant(list(label_map.values()), dtype=tf.int64)),
default_value=-1)
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
label_handler = slim_example_decoder.BackupHandler(
slim_example_decoder.LookupTensor(
'image/object/class/text', table, default_value=''),
slim_example_decoder.Tensor('image/object/class/label'))
else:
label_handler = slim_example_decoder.Tensor('image/object/class/label')
self.items_to_handlers[ self.items_to_handlers[
fields.InputDataFields.groundtruth_classes] = label_handler fields.InputDataFields.groundtruth_classes] = label_handler
...@@ -140,16 +179,21 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -140,16 +179,21 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[None, 4] containing box corners. [None, 4] containing box corners.
fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
[None] containing classes for the boxes. [None] containing classes for the boxes.
fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
shape [None] indicating the weights of groundtruth boxes.
fields.InputDataFields.num_groundtruth_boxes - int32 scalar indicating
the number of groundtruth_boxes.
fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
[None] containing containing object mask area in pixel squared. [None] containing containing object mask area in pixel squared.
fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
[None] indicating if the boxes enclose a crowd. [None] indicating if the boxes enclose a crowd.
Optional: Optional:
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances. [None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
[None] indicating if the boxes represent `group_of` instances. [None] indicating if the boxes represent `group_of` instances.
fields.InputDataFields.groundtruth_instance_masks - 3D int64 tensor of fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
shape [None, None, None] containing instance masks. shape [None, None, None] containing instance masks.
""" """
serialized_example = tf.reshape(tf_example_string_tensor, shape=[]) serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
...@@ -161,13 +205,27 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -161,13 +205,27 @@ class TfExampleDecoder(data_decoder.DataDecoder):
is_crowd = fields.InputDataFields.groundtruth_is_crowd is_crowd = fields.InputDataFields.groundtruth_is_crowd
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
def default_groundtruth_weights():
return tf.ones(
[tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
dtype=tf.float32)
tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
tf.greater(
tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
default_groundtruth_weights)
return tensor_dict return tensor_dict
def _reshape_instance_masks(self, keys_to_tensors): def _reshape_instance_masks(self, keys_to_tensors):
"""Reshape instance segmentation masks. """Reshape instance segmentation masks.
The instance segmentation masks are reshaped to [num_instances, height, The instance segmentation masks are reshaped to [num_instances, height,
width] and cast to boolean type to save memory. width].
Args: Args:
keys_to_tensors: a dictionary from keys to tensors. keys_to_tensors: a dictionary from keys to tensors.
...@@ -184,3 +242,34 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -184,3 +242,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
masks = tf.sparse_tensor_to_dense(masks) masks = tf.sparse_tensor_to_dense(masks)
masks = tf.reshape(tf.to_float(tf.greater(masks, 0.0)), to_shape) masks = tf.reshape(tf.to_float(tf.greater(masks, 0.0)), to_shape)
return tf.cast(masks, tf.float32) return tf.cast(masks, tf.float32)
def _decode_png_instance_masks(self, keys_to_tensors):
"""Decode PNG instance segmentation masks and stack into dense tensor.
The instance segmentation masks are reshaped to [num_instances, height,
width].
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float tensor of shape [num_instances, height, width] with values
in {0, 1}.
"""
def decode_png_mask(image_buffer):
image = tf.squeeze(
tf.image.decode_image(image_buffer, channels=1), axis=2)
image.set_shape([None, None])
image = tf.to_float(tf.greater(image, 0))
return image
png_masks = keys_to_tensors['image/object/mask']
height = keys_to_tensors['image/height']
width = keys_to_tensors['image/width']
if isinstance(png_masks, tf.SparseTensor):
png_masks = tf.sparse_tensor_to_dense(png_masks, default_value='')
return tf.cond(
tf.greater(tf.size(png_masks), 0),
lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
lambda: tf.zeros(tf.to_int32(tf.stack([0, height, width]))))
...@@ -21,6 +21,7 @@ import tensorflow as tf ...@@ -21,6 +21,7 @@ import tensorflow as tf
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder from object_detection.data_decoders import tf_example_decoder
from object_detection.protos import input_reader_pb2
class TfExampleDecoderTest(tf.test.TestCase): class TfExampleDecoderTest(tf.test.TestCase):
...@@ -57,7 +58,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -57,7 +58,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def testDecodeJpegImage(self): def testDecodeJpegImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
decoded_jpeg = self._DecodeImage(encoded_jpeg) decoded_jpeg = self._DecodeImage(encoded_jpeg)
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(features=tf.train.Features(feature={
...@@ -78,7 +79,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -78,7 +79,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id]) self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self): def testDecodeImageKeyAndFilename(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg), 'image/encoded': self._BytesFeature(encoded_jpeg),
...@@ -96,7 +97,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -96,7 +97,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename]) self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
def testDecodePngImage(self): def testDecodePngImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_png = self._EncodeImage(image_tensor, encoding_type='png') encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
decoded_png = self._DecodeImage(encoded_png, encoding_type='png') decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(features=tf.train.Features(feature={
...@@ -116,8 +117,62 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -116,8 +117,62 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image]) self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id]) self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodePngInstanceMasks(self):
image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png')
decoded_png_1 = np.squeeze(mask_1.astype(np.float32))
encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png')
decoded_png_2 = np.squeeze(mask_2.astype(np.float32))
encoded_masks = [encoded_png_1, encoded_png_2]
decoded_masks = np.stack([decoded_png_1, decoded_png_2])
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/mask': self._BytesFeature(encoded_masks)
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
decoded_masks,
tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
def testDecodeEmptyPngInstanceMasks(self):
image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
encoded_masks = []
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/mask': self._BytesFeature(encoded_masks),
'image/height': self._Int64Feature([10]),
'image/width': self._Int64Feature([10]),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape,
[0, 10, 10])
def testDecodeBoundingBox(self): def testDecodeBoundingBox(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0] bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0] bbox_xmins = [1.0, 5.0]
...@@ -144,9 +199,39 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -144,9 +199,39 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_ymaxs, bbox_xmaxs]).transpose() bbox_ymaxs, bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes, self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes]) tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual(
2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
def testDecodeDefaultGroundtruthWeights(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
np.ones(2, dtype=np.float32))
def testDecodeObjectLabel(self): def testDecodeObjectLabel(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [0, 1] bbox_classes = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(features=tf.train.Features(feature={
...@@ -168,8 +253,131 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -168,8 +253,131 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual(bbox_classes, self.assertAllEqual(bbox_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes]) tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelNoText(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [1, 2]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/class/label': self._Int64Feature(bbox_classes),
})).SerializeToString()
label_map_string = """
item {
id:1
name:'cat'
}
item {
id:2
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[None])
init = tf.tables_initializer()
with self.test_session() as sess:
sess.run(init)
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(bbox_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelUnrecognizedName(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes_text = ['cat', 'cheetah']
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
'image/format':
self._BytesFeature('jpeg'),
'image/object/class/text':
self._BytesFeature(bbox_classes_text),
})).SerializeToString()
label_map_string = """
item {
id:2
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [None])
with self.test_session() as sess:
sess.run(tf.tables_initializer())
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([2, -1],
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelWithMapping(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes_text = ['cat', 'dog']
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
'image/format':
self._BytesFeature('jpeg'),
'image/object/class/text':
self._BytesFeature(bbox_classes_text),
})).SerializeToString()
label_map_string = """
item {
id:3
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [None])
with self.test_session() as sess:
sess.run(tf.tables_initializer())
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([3, 1],
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectArea(self): def testDecodeObjectArea(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
object_area = [100., 174.] object_area = [100., 174.]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(features=tf.train.Features(feature={
...@@ -190,7 +398,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -190,7 +398,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict[fields.InputDataFields.groundtruth_area]) tensor_dict[fields.InputDataFields.groundtruth_area])
def testDecodeObjectIsCrowd(self): def testDecodeObjectIsCrowd(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
object_is_crowd = [0, 1] object_is_crowd = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(features=tf.train.Features(feature={
...@@ -213,7 +421,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -213,7 +421,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_is_crowd]) fields.InputDataFields.groundtruth_is_crowd])
def testDecodeObjectDifficult(self): def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
object_difficult = [0, 1] object_difficult = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(features=tf.train.Features(feature={
...@@ -236,7 +444,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -236,7 +444,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_difficult]) fields.InputDataFields.groundtruth_difficult])
def testDecodeObjectGroupOf(self): def testDecodeObjectGroupOf(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
object_group_of = [0, 1] object_group_of = [0, 1]
example = tf.train.Example(features=tf.train.Features( example = tf.train.Example(features=tf.train.Features(
...@@ -259,13 +467,37 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -259,13 +467,37 @@ class TfExampleDecoderTest(tf.test.TestCase):
[bool(item) for item in object_group_of], [bool(item) for item in object_group_of],
tensor_dict[fields.InputDataFields.groundtruth_group_of]) tensor_dict[fields.InputDataFields.groundtruth_group_of])
def testDecodeObjectWeight(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_weights = [0.75, 1.0]
example = tf.train.Example(features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/weight': self._FloatFeature(object_weights),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_weights].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
object_weights,
tensor_dict[fields.InputDataFields.groundtruth_weights])
def testDecodeInstanceSegmentation(self): def testDecodeInstanceSegmentation(self):
num_instances = 4 num_instances = 4
image_height = 5 image_height = 5
image_width = 3 image_width = 3
# Randomly generate image. # Randomly generate image.
image_tensor = np.random.randint(255, size=(image_height, image_tensor = np.random.randint(256, size=(image_height,
image_width, image_width,
3)).astype(np.uint8) 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
...@@ -316,7 +548,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -316,7 +548,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_height = 5 image_height = 5
image_width = 3 image_width = 3
# Randomly generate image. # Randomly generate image.
image_tensor = np.random.randint(255, size=(image_height, image_tensor = np.random.randint(256, size=(image_height,
image_width, image_width,
3)).astype(np.uint8) 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
......
# Tensorflow Object Detection API: main runnables. # Tensorflow Object Detection API: dataset tools.
package( package(
default_visibility = ["//visibility:public"], default_visibility = ["//visibility:public"],
...@@ -8,18 +8,43 @@ licenses(["notice"]) ...@@ -8,18 +8,43 @@ licenses(["notice"])
# Apache 2.0 # Apache 2.0
py_binary(
name = "create_coco_tf_record",
srcs = [
"create_coco_tf_record.py",
],
deps = [
"//PIL:pil",
"//pycocotools",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
py_test(
name = "create_coco_tf_record_test",
srcs = [
"create_coco_tf_record_test.py",
],
deps = [
":create_coco_tf_record",
"//tensorflow",
],
)
py_binary( py_binary(
name = "create_kitti_tf_record", name = "create_kitti_tf_record",
srcs = [ srcs = [
"create_kitti_tf_record.py", "create_kitti_tf_record.py",
], ],
deps = [ deps = [
"//third_party/py/PIL:pil", "//PIL:pil",
"//third_party/py/lxml", "//lxml",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util", "//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util", "//tensorflow/models/research/object_detection/utils:label_map_util",
"//tensorflow_models/object_detection/utils:np_box_ops", "//tensorflow/models/research/object_detection/utils:np_box_ops",
], ],
) )
...@@ -40,11 +65,11 @@ py_binary( ...@@ -40,11 +65,11 @@ py_binary(
"create_pascal_tf_record.py", "create_pascal_tf_record.py",
], ],
deps = [ deps = [
"//third_party/py/PIL:pil", "//PIL:pil",
"//third_party/py/lxml", "//lxml",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util", "//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util", "//tensorflow/models/research/object_detection/utils:label_map_util",
], ],
) )
...@@ -65,11 +90,11 @@ py_binary( ...@@ -65,11 +90,11 @@ py_binary(
"create_pet_tf_record.py", "create_pet_tf_record.py",
], ],
deps = [ deps = [
"//third_party/py/PIL:pil", "//PIL:pil",
"//third_party/py/lxml", "//lxml",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util", "//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util", "//tensorflow/models/research/object_detection/utils:label_map_util",
], ],
) )
...@@ -78,8 +103,8 @@ py_library( ...@@ -78,8 +103,8 @@ py_library(
srcs = ["oid_tfrecord_creation.py"], srcs = ["oid_tfrecord_creation.py"],
deps = [ deps = [
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields", "//tensorflow/models/research/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/utils:dataset_util", "//tensorflow/models/research/object_detection/utils:dataset_util",
], ],
) )
...@@ -88,9 +113,9 @@ py_test( ...@@ -88,9 +113,9 @@ py_test(
srcs = ["oid_tfrecord_creation_test.py"], srcs = ["oid_tfrecord_creation_test.py"],
deps = [ deps = [
":oid_tfrecord_creation", ":oid_tfrecord_creation",
"//third_party/py/contextlib2", "//contextlib2",
"//third_party/py/pandas", "//pandas",
"//third_party/py/tensorflow", "//tensorflow",
], ],
) )
...@@ -99,9 +124,9 @@ py_binary( ...@@ -99,9 +124,9 @@ py_binary(
srcs = ["create_oid_tf_record.py"], srcs = ["create_oid_tf_record.py"],
deps = [ deps = [
":oid_tfrecord_creation", ":oid_tfrecord_creation",
"//third_party/py/contextlib2", "//contextlib2",
"//third_party/py/pandas", "//pandas",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/utils:label_map_util", "//tensorflow/models/research/object_detection/utils:label_map_util",
], ],
) )
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection.
Example usage:
python create_coco_tf_record.py --logtostderr \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import json
import os
import numpy as np
import PIL.Image
from pycocotools import mask
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
tf.flags.DEFINE_boolean('include_masks', False,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.')
tf.flags.DEFINE_string('train_image_dir', '',
'Training image directory.')
tf.flags.DEFINE_string('val_image_dir', '',
'Validation image directory.')
tf.flags.DEFINE_string('test_image_dir', '',
'Test image directory.')
tf.flags.DEFINE_string('train_annotations_file', '',
'Training annotations JSON file.')
tf.flags.DEFINE_string('val_annotations_file', '',
'Validation annotations JSON file.')
tf.flags.DEFINE_string('testdev_annotations_file', '',
'Test-dev annotations JSON file.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
FLAGS = flags.FLAGS
tf.logging.set_verbosity(tf.logging.INFO)
def create_tf_example(image,
annotations_list,
image_dir,
category_index,
include_masks=False):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys:
[u'license', u'file_name', u'coco_url', u'height', u'width',
u'date_captured', u'flickr_url', u'id']
annotations_list:
list of dicts with keys:
[u'segmentation', u'area', u'iscrowd', u'image_id',
u'bbox', u'category_id', u'id']
Notice that bounding box coordinates in the official COCO dataset are
given as [x, y, width, height] tuples using absolute coordinates where
x, y represent the top-left (0-indexed) corner. This function converts
to the format expected by the Tensorflow Object Detection API (which is
which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
to image size).
image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed
by the 'id' field of each category. See the
label_map_util.create_category_index function.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
Returns:
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
full_path = os.path.join(image_dir, filename)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
key = hashlib.sha256(encoded_jpg).hexdigest()
xmin = []
xmax = []
ymin = []
ymax = []
is_crowd = []
category_names = []
category_ids = []
area = []
encoded_mask_png = []
num_annotations_skipped = 0
for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
num_annotations_skipped += 1
continue
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8'))
area.append(object_annotations['area'])
if include_masks:
run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
image_height, image_width)
binary_mask = mask.decode(run_len_encoding)
if not object_annotations['iscrowd']:
binary_mask = np.amax(binary_mask, axis=2)
pil_image = PIL.Image.fromarray(binary_mask)
output_io = io.BytesIO()
pil_image.save(output_io, format='PNG')
encoded_mask_png.append(output_io.getvalue())
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/filename':
dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_util.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_util.bytes_feature(encoded_jpg),
'image/format':
dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin':
dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax),
'image/object/class/label':
dataset_util.int64_list_feature(category_ids),
'image/object/is_crowd':
dataset_util.int64_list_feature(is_crowd),
'image/object/area':
dataset_util.float_list_feature(area),
}
if include_masks:
feature_dict['image/object/mask'] = (
dataset_util.bytes_list_feature(encoded_mask_png))
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped
def _create_tf_record_from_coco_annotations(
annotations_file, image_dir, output_path, include_masks):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
"""
with tf.gfile.GFile(annotations_file, 'r') as fid:
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
category_index = label_map_util.create_category_index(
groundtruth_data['categories'])
annotations_index = {}
if 'annotations' in groundtruth_data:
tf.logging.info(
'Found groundtruth annotations. Building annotations index.')
for annotation in groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in annotations_index:
annotations_index[image_id] = []
annotations_index[image_id].append(annotation)
missing_annotation_count = 0
for image in images:
image_id = image['id']
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
tf.logging.info('%d images are missing annotations.',
missing_annotation_count)
tf.logging.info('writing to output path: %s', output_path)
writer = tf.python_io.TFRecordWriter(output_path)
total_num_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations_list = annotations_index[image['id']]
_, tf_example, num_annotations_skipped = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks)
total_num_annotations_skipped += num_annotations_skipped
writer.write(tf_example.SerializeToString())
writer.close()
tf.logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
def main(_):
assert FLAGS.train_image_dir, '`train_image_dir` missing.'
assert FLAGS.val_image_dir, '`val_image_dir` missing.'
assert FLAGS.test_image_dir, '`test_image_dir` missing.'
assert FLAGS.train_annotations_file, '`train_annotations_file` missing.'
assert FLAGS.val_annotations_file, '`val_annotations_file` missing.'
assert FLAGS.testdev_annotations_file, '`testdev_annotations_file` missing.'
if not tf.gfile.IsDirectory(FLAGS.output_dir):
tf.gfile.MakeDirs(FLAGS.output_dir)
train_output_path = os.path.join(FLAGS.output_dir, 'coco_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'coco_val.record')
testdev_output_path = os.path.join(FLAGS.output_dir, 'coco_testdev.record')
_create_tf_record_from_coco_annotations(
FLAGS.train_annotations_file,
FLAGS.train_image_dir,
train_output_path,
FLAGS.include_masks)
_create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file,
FLAGS.val_image_dir,
val_output_path,
FLAGS.include_masks)
_create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file,
FLAGS.test_image_dir,
testdev_output_path,
FLAGS.include_masks)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_coco_tf_record.py."""
import io
import os
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection.dataset_tools import create_coco_tf_record
class CreateCocoTFRecordTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list = [p for p in proto_field]
self.assertListEqual(proto_list, expectation)
def test_create_tf_example(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(256, 256, 3)
tmp_dir = self.get_temp_dir()
save_path = os.path.join(tmp_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 256,
'width': 256,
'id': 11,
}
annotations_list = [{
'area': .5,
'iscrowd': False,
'image_id': 11,
'bbox': [64, 64, 128, 128],
'category_id': 2,
'id': 1000,
}]
image_dir = tmp_dir
category_index = {
1: {
'name': 'dog',
'id': 1
},
2: {
'name': 'cat',
'id': 2
},
3: {
'name': 'human',
'id': 3
}
}
(_, example,
num_annotations_skipped) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index)
self.assertEqual(num_annotations_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[str(image['id'])])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
def test_create_tf_example_with_instance_masks(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(8, 8, 3)
tmp_dir = self.get_temp_dir()
save_path = os.path.join(tmp_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 8,
'width': 8,
'id': 11,
}
annotations_list = [{
'area': .5,
'iscrowd': False,
'image_id': 11,
'bbox': [0, 0, 8, 8],
'segmentation': [[4, 0, 0, 0, 0, 4], [8, 4, 4, 8, 8, 8]],
'category_id': 1,
'id': 1000,
}]
image_dir = tmp_dir
category_index = {
1: {
'name': 'dog',
'id': 1
},
}
(_, example,
num_annotations_skipped) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index, include_masks=True)
self.assertEqual(num_annotations_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [8])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [8])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[str(image['id'])])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[1])
encoded_mask_pngs = [
io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[
'image/object/mask'].bytes_list.value
]
pil_masks = [
np.array(PIL.Image.open(encoded_mask_png))
for encoded_mask_png in encoded_mask_pngs
]
self.assertTrue(len(pil_masks) == 1)
self.assertAllEqual(pil_masks[0],
[[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]])
if __name__ == '__main__':
tf.test.main()
...@@ -120,7 +120,7 @@ def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use, ...@@ -120,7 +120,7 @@ def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
# Filter all bounding boxes of this frame that are of a legal class, and # Filter all bounding boxes of this frame that are of a legal class, and
# don't overlap with a dontcare region. # don't overlap with a dontcare region.
# TODO(talremez) filter out targets that are truncated or heavily occluded. # TODO filter out targets that are truncated or heavily occluded.
annotation_for_image = filter_annotations(img_anno, classes_to_use) annotation_for_image = filter_annotations(img_anno, classes_to_use)
example = prepare_example(image_path, annotation_for_image, label_map_dict) example = prepare_example(image_path, annotation_for_image, label_map_dict)
......
...@@ -24,7 +24,7 @@ import tensorflow as tf ...@@ -24,7 +24,7 @@ import tensorflow as tf
from object_detection.dataset_tools import create_kitti_tf_record from object_detection.dataset_tools import create_kitti_tf_record
class DictToTFExampleTest(tf.test.TestCase): class CreateKittiTFRecordTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation): def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value. """Helper function to assert if a proto field equals some value.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment