Unverified Commit 31ae57eb authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Minor fixes for object detection (#5613)

* Internal change.

PiperOrigin-RevId: 213914693

* Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image

PiperOrigin-RevId: 214018767

* Remove "groundtruth_confidences" from decoders use "groundtruth_weights" to indicate label confidence.

This also solves a bug that only surfaced now - random crop routines in core/preprocessor.py did not correctly handle "groundtruth_weight" tensors returned by the decoders.

PiperOrigin-RevId: 214091843

* Update CocoMaskEvaluator to allow for a batch of image info, rather than a single image.

PiperOrigin-RevId: 214295305

* Adding the option to be able to summarize gradients.

PiperOrigin-RevId: 214310875

* Adds FasterRCNN inference on CPU

1. Adds a flag use_static_shapes_for_eval to restrict to the ops that guarantees static shape.
2. No filtering of overlapping anchors while clipping the anchors when use_static_shapes_for_eval is set to True.
3. A...
parent 0b0c9cfd
......@@ -130,7 +130,8 @@ class TargetAssigner(object):
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
cls_weights: a float32 tensor with shape [num_anchors]
cls_weights: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
representing weights for each element in cls_targets.
reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
reg_weights: a float32 tensor with shape [num_anchors]
match: a matcher.Match object encoding the match between anchors and
......@@ -195,6 +196,15 @@ class TargetAssigner(object):
cls_weights = self._create_classification_weights(match,
groundtruth_weights)
# convert cls_weights from per-anchor to per-class.
class_label_shape = tf.shape(cls_targets)[1:]
weights_shape = tf.shape(cls_weights)
weights_multiple = tf.concat(
[tf.ones_like(weights_shape), class_label_shape],
axis=0)
for _ in range(len(cls_targets.get_shape()[1:])):
cls_weights = tf.expand_dims(cls_weights, -1)
cls_weights = tf.tile(cls_weights, weights_multiple)
num_anchors = anchors.num_boxes_static()
if num_anchors is not None:
......@@ -445,7 +455,8 @@ def batch_assign_targets(target_assigner,
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
num_classes],
batch_cls_weights: a tensor with shape [batch_size, num_anchors],
batch_cls_weights: a tensor with shape [batch_size, num_anchors,
num_classes],
batch_reg_targets: a tensor with shape [batch_size, num_anchors,
box_code_dimension]
batch_reg_weights: a tensor with shape [batch_size, num_anchors],
......
......@@ -52,7 +52,7 @@ class TargetAssignerTest(test_case.TestCase):
[0.5, 0.5, 0.9, 0.9]],
dtype=np.float32)
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_cls_weights = [[1], [1], [1]]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0]]
......@@ -96,7 +96,7 @@ class TargetAssignerTest(test_case.TestCase):
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]], dtype=np.float32)
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 0]
exp_cls_weights = [[1], [1], [0]]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0]]
......@@ -143,7 +143,7 @@ class TargetAssignerTest(test_case.TestCase):
[[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
dtype=np.float32)
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_cls_weights = [[1], [1], [1]]
exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
-5],
[-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
......@@ -198,7 +198,7 @@ class TargetAssignerTest(test_case.TestCase):
[[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
dtype=np.float32)
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_cls_weights = [[1], [1], [1]]
exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
-5],
[-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
......@@ -254,7 +254,10 @@ class TargetAssignerTest(test_case.TestCase):
[0, 0, 0, 0, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0]]
exp_cls_weights = [1, 1, 1, 1]
exp_cls_weights = [[1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1]]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0],
......@@ -308,7 +311,11 @@ class TargetAssignerTest(test_case.TestCase):
[0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
groundtruth_weights = np.array([0.3, 0., 0.5], dtype=np.float32)
exp_cls_weights = [0.3, 0., 1, 0.5] # background class gets weight of 1.
# background class gets weight of 1.
exp_cls_weights = [[0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3],
[0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1, 1],
[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]
exp_reg_weights = [0.3, 0., 0., 0.5] # background class gets weight of 0.
(cls_weights_out, reg_weights_out) = self.execute(graph_fn, [
......@@ -354,7 +361,11 @@ class TargetAssignerTest(test_case.TestCase):
[.5, 0, 0, .5, 0, 0, 0]],
dtype=np.float32)
exp_cls_weights = [1, 1, 1, 1] # background class gets weight of 1.
exp_cls_weights = [
[1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1]] # background class gets weight of 1.
exp_reg_weights = [.1, 1, 0., .5] # background class gets weight of 0.
(cls_weights_out, reg_weights_out) = self.execute(
......@@ -400,7 +411,10 @@ class TargetAssignerTest(test_case.TestCase):
[[1, 0], [0, 1]],
[[0, 0], [0, 0]],
[[0, 1], [1, .5]]]
exp_cls_weights = [1, 1, 1, 1]
exp_cls_weights = [[[1, 1], [1, 1]],
[[1, 1], [1, 1]],
[[1, 1], [1, 1]],
[[1, 1], [1, 1]]]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0],
......@@ -449,7 +463,10 @@ class TargetAssignerTest(test_case.TestCase):
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]
exp_cls_weights = [1, 1, 1, 1]
exp_cls_weights = [[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
......@@ -555,6 +572,10 @@ class BatchTargetAssignerTest(test_case.TestCase):
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
exp_cls_targets = [[[1], [0], [0], [0]],
[[0], [1], [1], [0]]]
exp_cls_weights = [[[1], [1], [1], [1]],
[[1], [1], [1], [1]]]
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
......@@ -563,10 +584,6 @@ class BatchTargetAssignerTest(test_case.TestCase):
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[1], [0], [0], [0]],
[[0], [1], [1], [0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
......@@ -608,17 +625,6 @@ class BatchTargetAssignerTest(test_case.TestCase):
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[0, 1, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
......@@ -627,6 +633,22 @@ class BatchTargetAssignerTest(test_case.TestCase):
[0, 0, 0, 1],
[0, 0, 1, 0],
[1, 0, 0, 0]]]
exp_cls_weights = [[[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]],
[[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]]]
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
......@@ -678,16 +700,6 @@ class BatchTargetAssignerTest(test_case.TestCase):
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[0, 1, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
......@@ -696,6 +708,22 @@ class BatchTargetAssignerTest(test_case.TestCase):
[0, 0, 0, 1],
[0, 0, 1, 0],
[1, 0, 0, 0]]]
exp_cls_weights = [[[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]],
[[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]]]
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
......@@ -748,16 +776,6 @@ class BatchTargetAssignerTest(test_case.TestCase):
[0, .1, .5, .5],
[.75, .75, 1, 1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[[0., 1., 1.],
[1., 1., 0.]],
[[0., 0., 0.],
......@@ -774,6 +792,30 @@ class BatchTargetAssignerTest(test_case.TestCase):
[0., 0., 1.]],
[[0., 0., 0.],
[0., 0., 0.]]]]
exp_cls_weights = [[[[1., 1., 1.],
[1., 1., 1.]],
[[1., 1., 1.],
[1., 1., 1.]],
[[1., 1., 1.],
[1., 1., 1.]],
[[1., 1., 1.],
[1., 1., 1.]]],
[[[1., 1., 1.],
[1., 1., 1.]],
[[1., 1., 1.],
[1., 1., 1.]],
[[1., 1., 1.],
[1., 1., 1.]],
[[1., 1., 1.],
[1., 1., 1.]]]]
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
......@@ -807,11 +849,12 @@ class BatchTargetAssignerTest(test_case.TestCase):
groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
anchor_means = np.array([[0, 0, .25, .25],
[0, .25, 1, 1]], dtype=np.float32)
exp_reg_targets = [[[0, 0, 0, 0],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1]]
exp_cls_targets = [[[1, 0, 0, 0],
[1, 0, 0, 0]]]
exp_cls_weights = [[[1, 1, 1, 1],
[1, 1, 1, 1]]]
exp_reg_targets = [[[0, 0, 0, 0],
[0, 0, 0, 0]]]
exp_reg_weights = [[0, 0]]
num_classes = 3
pad = 1
......
item {
name: "face"
id: 1
display_name: "face"
}
This diff is collapsed.
......@@ -335,8 +335,6 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[None] containing classes for the boxes.
fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
shape [None] indicating the weights of groundtruth boxes.
fields.InputDataFields.num_groundtruth_boxes - int32 scalar indicating
the number of groundtruth_boxes.
fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
[None] containing containing object mask area in pixel squared.
fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
......@@ -369,8 +367,6 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
tensor_dict[fields.InputDataFields.image])[:2]
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
if fields.InputDataFields.image_additional_channels in tensor_dict:
channels = tensor_dict[fields.InputDataFields.image_additional_channels]
......
......@@ -256,8 +256,6 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual(
2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
@test_util.enable_c_shapes
def testDecodeKeypoint(self):
......@@ -305,8 +303,6 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual(
2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
expected_keypoints = (
np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2)))
......
......@@ -27,6 +27,7 @@ from object_detection.core import standard_fields as fields
from object_detection.metrics import coco_evaluation
from object_detection.utils import label_map_util
from object_detection.utils import ops
from object_detection.utils import shape_utils
from object_detection.utils import visualization_utils as vis_utils
slim = tf.contrib.slim
......@@ -321,6 +322,7 @@ def _run_checkpoint_once(tensor_dict,
# TODO(akuznetsa): result_dict contains batches of images, while
# add_single_ground_truth_image_info expects a single image. Fix
if (isinstance(result_dict, dict) and
fields.InputDataFields.key in result_dict and
result_dict[fields.InputDataFields.key]):
image_id = result_dict[fields.InputDataFields.key]
else:
......@@ -475,6 +477,35 @@ def repeated_checkpoint_run(tensor_dict,
return metrics
def _scale_box_to_absolute(args):
boxes, image_shape = args
return box_list_ops.to_absolute_coordinates(
box_list.BoxList(boxes), image_shape[0], image_shape[1]).get()
def _resize_detection_masks(args):
detection_boxes, detection_masks, image_shape = args
detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image_shape[0], image_shape[1])
return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
def _resize_groundtruth_masks(args):
mask, image_shape = args
mask = tf.expand_dims(mask, 3)
mask = tf.image.resize_images(
mask,
image_shape,
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=True)
return tf.cast(tf.squeeze(mask, 3), tf.uint8)
def _scale_keypoint_to_absolute(args):
keypoints, image_shape = args
return keypoint_ops.scale(keypoints, image_shape[0], image_shape[1])
def result_dict_for_single_example(image,
key,
detections,
......@@ -533,89 +564,225 @@ def result_dict_for_single_example(image,
(Optional).
"""
if groundtruth:
max_gt_boxes = tf.shape(
groundtruth[fields.InputDataFields.groundtruth_boxes])[0]
for gt_key in groundtruth:
# expand groundtruth dict along the batch dimension.
groundtruth[gt_key] = tf.expand_dims(groundtruth[gt_key], 0)
for detection_key in detections:
detections[detection_key] = tf.expand_dims(
detections[detection_key][0], axis=0)
batched_output_dict = result_dict_for_batched_example(
image,
tf.expand_dims(key, 0),
detections,
groundtruth,
class_agnostic,
scale_to_absolute,
max_gt_boxes=max_gt_boxes)
exclude_keys = [
fields.InputDataFields.original_image,
fields.DetectionResultFields.num_detections,
fields.InputDataFields.num_groundtruth_boxes,
fields.InputDataFields.original_image_spatial_shape
]
output_dict = {
fields.InputDataFields.original_image:
batched_output_dict[fields.InputDataFields.original_image]
}
for key in batched_output_dict:
# remove the batch dimension.
if key not in exclude_keys:
output_dict[key] = tf.squeeze(batched_output_dict[key], 0)
return output_dict
def result_dict_for_batched_example(images,
keys,
detections,
groundtruth=None,
class_agnostic=False,
scale_to_absolute=False,
original_image_spatial_shapes=None,
max_gt_boxes=None):
"""Merges all detection and groundtruth information for a single example.
Note that evaluation tools require classes that are 1-indexed, and so this
function performs the offset. If `class_agnostic` is True, all output classes
have label 1.
Args:
images: A single 4D uint8 image tensor of shape [batch_size, H, W, C].
keys: A [batch_size] string tensor with image identifier.
detections: A dictionary of detections, returned from
DetectionModel.postprocess().
groundtruth: (Optional) Dictionary of groundtruth items, with fields:
'groundtruth_boxes': [batch_size, max_number_of_boxes, 4] float32 tensor
of boxes, in normalized coordinates.
'groundtruth_classes': [batch_size, max_number_of_boxes] int64 tensor of
1-indexed classes.
'groundtruth_area': [batch_size, max_number_of_boxes] float32 tensor of
bbox area. (Optional)
'groundtruth_is_crowd':[batch_size, max_number_of_boxes] int64
tensor. (Optional)
'groundtruth_difficult': [batch_size, max_number_of_boxes] int64
tensor. (Optional)
'groundtruth_group_of': [batch_size, max_number_of_boxes] int64
tensor. (Optional)
'groundtruth_instance_masks': 4D int64 tensor of instance
masks (Optional).
class_agnostic: Boolean indicating whether the detections are class-agnostic
(i.e. binary). Default False.
scale_to_absolute: Boolean indicating whether boxes and keypoints should be
scaled to absolute coordinates. Note that for IoU based evaluations, it
does not matter whether boxes are expressed in absolute or relative
coordinates. Default False.
original_image_spatial_shapes: A 2D int32 tensor of shape [batch_size, 2]
used to resize the image. When set to None, the image size is retained.
max_gt_boxes: [batch_size] tensor representing the maximum number of
groundtruth boxes to pad.
Returns:
A dictionary with:
'original_image': A [batch_size, H, W, C] uint8 image tensor.
'original_image_spatial_shape': A [batch_size, 2] tensor containing the
original image sizes.
'key': A [batch_size] string tensor with image identifier.
'detection_boxes': [batch_size, max_detections, 4] float32 tensor of boxes,
in normalized or absolute coordinates, depending on the value of
`scale_to_absolute`.
'detection_scores': [batch_size, max_detections] float32 tensor of scores.
'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
classes.
'detection_masks': [batch_size, max_detections, H, W] float32 tensor of
binarized masks, reframed to full image masks.
'num_detections': [batch_size] int64 tensor containing number of valid
detections.
'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
normalized or absolute coordinates, depending on the value of
`scale_to_absolute`. (Optional)
'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed
classes. (Optional)
'groundtruth_area': [batch_size, num_boxes] float32 tensor of bbox
area. (Optional)
'groundtruth_is_crowd': [batch_size, num_boxes] int64 tensor. (Optional)
'groundtruth_difficult': [batch_size, num_boxes] int64 tensor. (Optional)
'groundtruth_group_of': [batch_size, num_boxes] int64 tensor. (Optional)
'groundtruth_instance_masks': 4D int64 tensor of instance masks
(Optional).
'num_groundtruth_boxes': [batch_size] tensor containing the maximum number
of groundtruth boxes per image.
Raises:
ValueError: if original_image_spatial_shape is not 1D int32 tensor of shape
[2].
"""
label_id_offset = 1 # Applying label id offset (b/63711816)
input_data_fields = fields.InputDataFields
if original_image_spatial_shapes is None:
original_image_spatial_shapes = tf.tile(
tf.expand_dims(tf.shape(images)[1:3], axis=0),
multiples=[tf.shape(images)[0], 1])
else:
if (len(original_image_spatial_shapes.shape) != 2 and
original_image_spatial_shapes.shape[1] != 2):
raise ValueError(
'`original_image_spatial_shape` should be a 2D tensor of shape '
'[batch_size, 2].')
output_dict = {
input_data_fields.original_image: image,
input_data_fields.key: key,
input_data_fields.original_image: images,
input_data_fields.key: keys,
input_data_fields.original_image_spatial_shape: (
original_image_spatial_shapes)
}
detection_fields = fields.DetectionResultFields
detection_boxes = detections[detection_fields.detection_boxes][0]
image_shape = tf.shape(image)
detection_scores = detections[detection_fields.detection_scores][0]
detection_boxes = detections[detection_fields.detection_boxes]
detection_scores = detections[detection_fields.detection_scores]
num_detections = tf.to_int32(detections[detection_fields.num_detections])
if class_agnostic:
detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
else:
detection_classes = (
tf.to_int64(detections[detection_fields.detection_classes][0]) +
tf.to_int64(detections[detection_fields.detection_classes]) +
label_id_offset)
num_detections = tf.to_int32(detections[detection_fields.num_detections][0])
detection_boxes = tf.slice(
detection_boxes, begin=[0, 0], size=[num_detections, -1])
detection_classes = tf.slice(
detection_classes, begin=[0], size=[num_detections])
detection_scores = tf.slice(
detection_scores, begin=[0], size=[num_detections])
if scale_to_absolute:
absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
output_dict[detection_fields.detection_boxes] = (
absolute_detection_boxlist.get())
shape_utils.static_or_dynamic_map_fn(
_scale_box_to_absolute,
elems=[detection_boxes, original_image_spatial_shapes],
dtype=tf.float32))
else:
output_dict[detection_fields.detection_boxes] = detection_boxes
output_dict[detection_fields.detection_classes] = detection_classes
output_dict[detection_fields.detection_scores] = detection_scores
output_dict[detection_fields.num_detections] = num_detections
if detection_fields.detection_masks in detections:
detection_masks = detections[detection_fields.detection_masks][0]
detection_masks = detections[detection_fields.detection_masks]
# TODO(rathodv): This should be done in model's postprocess
# function ideally.
detection_masks = tf.slice(
detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1])
detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image_shape[1], image_shape[2])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
output_dict[detection_fields.detection_masks] = detection_masks_reframed
output_dict[detection_fields.detection_masks] = (
shape_utils.static_or_dynamic_map_fn(
_resize_detection_masks,
elems=[detection_boxes, detection_masks,
original_image_spatial_shapes],
dtype=tf.uint8))
if detection_fields.detection_keypoints in detections:
detection_keypoints = detections[detection_fields.detection_keypoints][0]
detection_keypoints = detections[detection_fields.detection_keypoints]
output_dict[detection_fields.detection_keypoints] = detection_keypoints
if scale_to_absolute:
absolute_detection_keypoints = keypoint_ops.scale(
detection_keypoints, image_shape[1], image_shape[2])
output_dict[detection_fields.detection_keypoints] = (
absolute_detection_keypoints)
shape_utils.static_or_dynamic_map_fn(
_scale_keypoint_to_absolute,
elems=[detection_keypoints, original_image_spatial_shapes],
dtype=tf.float32))
if groundtruth:
if max_gt_boxes is None:
if input_data_fields.num_groundtruth_boxes in groundtruth:
max_gt_boxes = groundtruth[input_data_fields.num_groundtruth_boxes]
else:
raise ValueError(
'max_gt_boxes must be provided when processing batched examples.')
if input_data_fields.groundtruth_instance_masks in groundtruth:
masks = groundtruth[input_data_fields.groundtruth_instance_masks]
masks = tf.expand_dims(masks, 3)
masks = tf.image.resize_images(
masks,
image_shape[1:3],
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=True)
masks = tf.squeeze(masks, 3)
groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast(
masks, tf.uint8)
groundtruth[input_data_fields.groundtruth_instance_masks] = (
shape_utils.static_or_dynamic_map_fn(
_resize_groundtruth_masks,
elems=[masks, original_image_spatial_shapes],
dtype=tf.uint8))
output_dict.update(groundtruth)
if scale_to_absolute:
groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes]
absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2])
output_dict[input_data_fields.groundtruth_boxes] = (
absolute_gt_boxlist.get())
shape_utils.static_or_dynamic_map_fn(
_scale_box_to_absolute,
elems=[groundtruth_boxes, original_image_spatial_shapes],
dtype=tf.float32))
# For class-agnostic models, groundtruth classes all become 1.
if class_agnostic:
groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes]
groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64)
output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes
output_dict[input_data_fields.num_groundtruth_boxes] = max_gt_boxes
return output_dict
......
......@@ -18,37 +18,58 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from absl.testing import parameterized
import tensorflow as tf
from object_detection import eval_util
from object_detection.core import standard_fields as fields
from object_detection.protos import eval_pb2
from object_detection.utils import test_case
class EvalUtilTest(tf.test.TestCase):
class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
def _get_categories_list(self):
return [{'id': 0, 'name': 'person'},
{'id': 1, 'name': 'dog'},
{'id': 2, 'name': 'cat'}]
def _make_evaluation_dict(self, resized_groundtruth_masks=False):
def _make_evaluation_dict(self,
resized_groundtruth_masks=False,
batch_size=1,
max_gt_boxes=None,
scale_to_absolute=False):
input_data_fields = fields.InputDataFields
detection_fields = fields.DetectionResultFields
image = tf.zeros(shape=[1, 20, 20, 3], dtype=tf.uint8)
key = tf.constant('image1')
detection_boxes = tf.constant([[[0., 0., 1., 1.]]])
detection_scores = tf.constant([[0.8]])
detection_classes = tf.constant([[0]])
detection_masks = tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32)
num_detections = tf.constant([1])
image = tf.zeros(shape=[batch_size, 20, 20, 3], dtype=tf.uint8)
if batch_size == 1:
key = tf.constant('image1')
else:
key = tf.constant([str(range(batch_size))])
detection_boxes = tf.tile(tf.constant([[[0., 0., 1., 1.]]]),
multiples=[batch_size, 1, 1])
detection_scores = tf.tile(tf.constant([[0.8]]), multiples=[batch_size, 1])
detection_classes = tf.tile(tf.constant([[0]]), multiples=[batch_size, 1])
detection_masks = tf.tile(tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32),
multiples=[batch_size, 1, 1, 1])
num_detections = tf.ones([batch_size])
groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
groundtruth_classes = tf.constant([1])
groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
if resized_groundtruth_masks:
groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)
if batch_size > 1:
groundtruth_boxes = tf.tile(tf.expand_dims(groundtruth_boxes, 0),
multiples=[batch_size, 1, 1])
groundtruth_classes = tf.tile(tf.expand_dims(groundtruth_classes, 0),
multiples=[batch_size, 1])
groundtruth_instance_masks = tf.tile(
tf.expand_dims(groundtruth_instance_masks, 0),
multiples=[batch_size, 1, 1, 1])
detections = {
detection_fields.detection_boxes: detection_boxes,
detection_fields.detection_scores: detection_scores,
......@@ -61,14 +82,31 @@ class EvalUtilTest(tf.test.TestCase):
input_data_fields.groundtruth_classes: groundtruth_classes,
input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks
}
return eval_util.result_dict_for_single_example(image, key, detections,
groundtruth)
def test_get_eval_metric_ops_for_coco_detections(self):
if batch_size > 1:
return eval_util.result_dict_for_batched_example(
image, key, detections, groundtruth,
scale_to_absolute=scale_to_absolute,
max_gt_boxes=max_gt_boxes)
else:
return eval_util.result_dict_for_single_example(
image, key, detections, groundtruth,
scale_to_absolute=scale_to_absolute)
@parameterized.parameters(
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True},
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
def test_get_eval_metric_ops_for_coco_detections(self, batch_size=1,
max_gt_boxes=None,
scale_to_absolute=False):
eval_config = eval_pb2.EvalConfig()
eval_config.metrics_set.extend(['coco_detection_metrics'])
categories = self._get_categories_list()
eval_dict = self._make_evaluation_dict()
eval_dict = self._make_evaluation_dict(batch_size=batch_size,
max_gt_boxes=max_gt_boxes,
scale_to_absolute=scale_to_absolute)
metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_config, categories, eval_dict)
_, update_op = metric_ops['DetectionBoxes_Precision/mAP']
......@@ -79,16 +117,24 @@ class EvalUtilTest(tf.test.TestCase):
metrics[key] = value_op
sess.run(update_op)
metrics = sess.run(metrics)
print(metrics)
self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
self.assertNotIn('DetectionMasks_Precision/mAP', metrics)
def test_get_eval_metric_ops_for_coco_detections_and_masks(self):
@parameterized.parameters(
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True},
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
def test_get_eval_metric_ops_for_coco_detections_and_masks(
self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
eval_config = eval_pb2.EvalConfig()
eval_config.metrics_set.extend(
['coco_detection_metrics', 'coco_mask_metrics'])
categories = self._get_categories_list()
eval_dict = self._make_evaluation_dict()
eval_dict = self._make_evaluation_dict(batch_size=batch_size,
max_gt_boxes=max_gt_boxes,
scale_to_absolute=scale_to_absolute)
metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_config, categories, eval_dict)
_, update_op_boxes = metric_ops['DetectionBoxes_Precision/mAP']
......@@ -104,12 +150,22 @@ class EvalUtilTest(tf.test.TestCase):
self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP'])
def test_get_eval_metric_ops_for_coco_detections_and_resized_masks(self):
@parameterized.parameters(
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True},
{'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
{'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
)
def test_get_eval_metric_ops_for_coco_detections_and_resized_masks(
self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
eval_config = eval_pb2.EvalConfig()
eval_config.metrics_set.extend(
['coco_detection_metrics', 'coco_mask_metrics'])
categories = self._get_categories_list()
eval_dict = self._make_evaluation_dict(resized_groundtruth_masks=True)
eval_dict = self._make_evaluation_dict(batch_size=batch_size,
max_gt_boxes=max_gt_boxes,
scale_to_absolute=scale_to_absolute,
resized_groundtruth_masks=True)
metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
eval_config, categories, eval_dict)
_, update_op_boxes = metric_ops['DetectionBoxes_Precision/mAP']
......
......@@ -234,12 +234,16 @@ def export_tflite_graph(pipeline_config, trained_checkpoint_prefix, output_dir,
tf.train.get_or_create_global_step()
# graph rewriter
if pipeline_config.HasField('graph_rewriter'):
is_quantized = pipeline_config.HasField('graph_rewriter')
if is_quantized:
graph_rewriter_config = pipeline_config.graph_rewriter
graph_rewriter_fn = graph_rewriter_builder.build(
graph_rewriter_config, is_training=False)
graph_rewriter_fn()
if pipeline_config.model.ssd.feature_extractor.HasField('fpn'):
exporter.rewrite_nn_resize_op(is_quantized)
# freeze the graph
saver_kwargs = {}
if pipeline_config.eval_config.use_moving_averages:
......
......@@ -23,6 +23,7 @@ import six
import tensorflow as tf
from tensorflow.core.framework import types_pb2
from object_detection import export_tflite_ssd_graph_lib
from object_detection import exporter
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder
from object_detection.core import model
......@@ -70,6 +71,12 @@ class FakeModel(model.DetectionModel):
def loss(self, prediction_dict, true_image_shapes):
pass
def regularization_losses(self):
pass
def updates(self):
pass
class ExportTfliteGraphTest(tf.test.TestCase):
......@@ -335,6 +342,28 @@ class ExportTfliteGraphTest(tf.test.TestCase):
for t in node.attr['_output_types'].list.type
]))
@mock.patch.object(exporter, 'rewrite_nn_resize_op')
def test_export_with_nn_resize_op_not_called_without_fpn(self, mock_get):
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10
tflite_graph_file = self._export_graph_with_postprocessing_op(
pipeline_config)
self.assertTrue(os.path.exists(tflite_graph_file))
mock_get.assert_not_called()
@mock.patch.object(exporter, 'rewrite_nn_resize_op')
def test_export_with_nn_resize_op_called_with_fpn(self, mock_get):
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10
pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10
pipeline_config.model.ssd.feature_extractor.fpn.min_level = 3
pipeline_config.model.ssd.feature_extractor.fpn.max_level = 7
tflite_graph_file = self._export_graph_with_postprocessing_op(
pipeline_config)
self.assertTrue(os.path.exists(tflite_graph_file))
mock_get.assert_called_once()
if __name__ == '__main__':
tf.test.main()
......@@ -17,6 +17,7 @@
import os
import tempfile
import tensorflow as tf
from tensorflow.contrib.quantize.python import graph_matcher
from tensorflow.core.protobuf import saver_pb2
from tensorflow.python.client import session
from tensorflow.python.platform import gfile
......@@ -28,12 +29,58 @@ from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
from object_detection.utils import config_util
from object_detection.utils import shape_utils
slim = tf.contrib.slim
freeze_graph_with_def_protos = freeze_graph.freeze_graph_with_def_protos
def rewrite_nn_resize_op(is_quantized=False):
"""Replaces a custom nearest-neighbor resize op with the Tensorflow version.
Some graphs use this custom version for TPU-compatibility.
Args:
is_quantized: True if the default graph is quantized.
"""
input_pattern = graph_matcher.OpTypePattern(
'FakeQuantWithMinMaxVars' if is_quantized else '*')
reshape_1_pattern = graph_matcher.OpTypePattern(
'Reshape', inputs=[input_pattern, 'Const'], ordered_inputs=False)
mul_pattern = graph_matcher.OpTypePattern(
'Mul', inputs=[reshape_1_pattern, 'Const'], ordered_inputs=False)
# The quantization script may or may not insert a fake quant op after the
# Mul. In either case, these min/max vars are not needed once replaced with
# the TF version of NN resize.
fake_quant_pattern = graph_matcher.OpTypePattern(
'FakeQuantWithMinMaxVars',
inputs=[mul_pattern, 'Identity', 'Identity'],
ordered_inputs=False)
reshape_2_pattern = graph_matcher.OpTypePattern(
'Reshape',
inputs=[graph_matcher.OneofPattern([fake_quant_pattern, mul_pattern]),
'Const'],
ordered_inputs=False)
add_pattern = graph_matcher.OpTypePattern(
'Add', inputs=[reshape_2_pattern, '*'], ordered_inputs=False)
matcher = graph_matcher.GraphMatcher(add_pattern)
for match in matcher.match_graph(tf.get_default_graph()):
projection_op = match.get_op(input_pattern)
reshape_2_op = match.get_op(reshape_2_pattern)
add_op = match.get_op(add_pattern)
nn_resize = tf.image.resize_nearest_neighbor(
projection_op.outputs[0],
add_op.outputs[0].shape.dims[1:3],
align_corners=False)
for index, op_input in enumerate(add_op.inputs):
if op_input == reshape_2_op.outputs[0]:
add_op._update_input(index, nn_resize) # pylint: disable=protected-access
break
def replace_variable_values_with_moving_averages(graph,
current_checkpoint_file,
new_checkpoint_file):
......@@ -82,11 +129,12 @@ def _tf_example_input_placeholder():
image_tensor = tensor_dict[fields.InputDataFields.image]
return image_tensor
return (batch_tf_example_placeholder,
tf.map_fn(decode,
elems=batch_tf_example_placeholder,
dtype=tf.uint8,
parallel_iterations=32,
back_prop=False))
shape_utils.static_or_dynamic_map_fn(
decode,
elems=batch_tf_example_placeholder,
dtype=tf.uint8,
parallel_iterations=32,
back_prop=False))
def _encoded_image_string_tensor_input_placeholder():
......@@ -121,8 +169,8 @@ input_placeholder_fn_map = {
}
def _add_output_tensor_nodes(postprocessed_tensors,
output_collection_name='inference_op'):
def add_output_tensor_nodes(postprocessed_tensors,
output_collection_name='inference_op'):
"""Adds output nodes for detection boxes and scores.
Adds the following nodes for output tensors -
......@@ -254,8 +302,8 @@ def _get_outputs_from_inputs(input_tensors, detection_model,
preprocessed_inputs, true_image_shapes)
postprocessed_tensors = detection_model.postprocess(
output_tensors, true_image_shapes)
return _add_output_tensor_nodes(postprocessed_tensors,
output_collection_name)
return add_output_tensor_nodes(postprocessed_tensors,
output_collection_name)
def _build_detection_graph(input_type, detection_model, input_shape,
......
......@@ -19,12 +19,15 @@ import numpy as np
import six
import tensorflow as tf
from google.protobuf import text_format
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from object_detection import exporter
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder
from object_detection.core import model
from object_detection.protos import graph_rewriter_pb2
from object_detection.protos import pipeline_pb2
from object_detection.utils import ops
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
......@@ -74,6 +77,12 @@ class FakeModel(model.DetectionModel):
def loss(self, prediction_dict, true_image_shapes):
pass
def regularization_losses(self):
pass
def updates(self):
pass
class ExportInferenceGraphTest(tf.test.TestCase):
......@@ -928,6 +937,52 @@ class ExportInferenceGraphTest(tf.test.TestCase):
self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
self.assertAllClose(num_detections_np, [2, 1])
def test_rewrite_nn_resize_op(self):
g = tf.Graph()
with g.as_default():
x = array_ops.placeholder(dtypes.float32, shape=(8, 10, 10, 8))
y = array_ops.placeholder(dtypes.float32, shape=(8, 20, 20, 8))
s = ops.nearest_neighbor_upsampling(x, 2)
t = s + y
exporter.rewrite_nn_resize_op()
resize_op_found = False
for op in g.get_operations():
if op.type == 'ResizeNearestNeighbor':
resize_op_found = True
self.assertEqual(op.inputs[0], x)
self.assertEqual(op.outputs[0].consumers()[0], t.op)
break
self.assertTrue(resize_op_found)
def test_rewrite_nn_resize_op_quantized(self):
g = tf.Graph()
with g.as_default():
x = array_ops.placeholder(dtypes.float32, shape=(8, 10, 10, 8))
x_conv = tf.contrib.slim.conv2d(x, 8, 1)
y = array_ops.placeholder(dtypes.float32, shape=(8, 20, 20, 8))
s = ops.nearest_neighbor_upsampling(x_conv, 2)
t = s + y
graph_rewriter_config = graph_rewriter_pb2.GraphRewriter()
graph_rewriter_config.quantization.delay = 500000
graph_rewriter_fn = graph_rewriter_builder.build(
graph_rewriter_config, is_training=False)
graph_rewriter_fn()
exporter.rewrite_nn_resize_op(is_quantized=True)
resize_op_found = False
for op in g.get_operations():
if op.type == 'ResizeNearestNeighbor':
resize_op_found = True
self.assertEqual(op.inputs[0].op.type, 'FakeQuantWithMinMaxVars')
self.assertEqual(op.outputs[0].consumers()[0], t.op)
break
self.assertTrue(resize_op_found)
if __name__ == '__main__':
tf.test.main()
......@@ -78,6 +78,7 @@ Some remarks on frozen inference graphs:
| [ssd_mobilenet_v1_fpn_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz) | 56 | 32 | Boxes |
| [ssd_resnet_50_fpn_coco ☆](http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz) | 76 | 35 | Boxes |
| [ssd_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz) | 31 | 22 | Boxes |
| [ssd_mobilenet_v2_quantized_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_quantized_300x300_coco_2018_09_14.tar.gz) | 29 | 22 | Boxes |
| [ssdlite_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz) | 27 | 22 | Boxes |
| [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2018_01_28.tar.gz) | 42 | 24 | Boxes |
| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 58 | 28 | Boxes |
......@@ -111,6 +112,7 @@ Model name
----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
[faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz) | 727 | 37 | Boxes
[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz) | 347 | | Boxes
[facessd_mobilenet_v2_quantized_open_image_v4](http://download.tensorflow.org/models/object_detection/facessd_mobilenet_v2_quantized_320x320_open_image_v4.tar.gz) [^3] | 20 | 73 (faces) | Boxes
## iNaturalist Species-trained models
......@@ -130,4 +132,5 @@ Model name
[^1]: See [MSCOCO evaluation protocol](http://cocodataset.org/#detections-eval).
[^2]: This is PASCAL mAP with a slightly different way of true positives computation: see [Open Images evaluation protocol](evaluation_protocols.md#open-images).
[^3]: Non-face boxes are dropped during training and non-face groundtruth boxes are ignored when evaluating.
......@@ -108,7 +108,7 @@ Run the compilation process again, but use the downloaded version of protoc
**If you are on MacOS:**
If you have homebrew, download and install the protobuf with
If you have homebrew, download and install the protobuf with
```brew install protobuf```
Alternately, run:
......@@ -118,7 +118,7 @@ sudo unzip -o $PROTOC_ZIP -d /usr/local bin/protoc
rm -f $PROTOC_ZIP
```
Run the compilation process again:
Run the compilation process again:
``` bash
# From tensorflow/models/research/
......
......@@ -124,6 +124,8 @@ def transform_input_data(tensor_dict,
if fields.InputDataFields.groundtruth_instance_masks in tensor_dict:
masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
_, resized_masks, _ = image_resizer_fn(image, masks)
if use_bfloat16:
resized_masks = tf.cast(resized_masks, tf.bfloat16)
tensor_dict[fields.InputDataFields.
groundtruth_instance_masks] = resized_masks
......@@ -161,6 +163,9 @@ def transform_input_data(tensor_dict,
tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
merged_confidences)
if fields.InputDataFields.groundtruth_boxes in tensor_dict:
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
return tensor_dict
......@@ -282,12 +287,9 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in tensor_dict)
include_keypoints = (fields.InputDataFields.groundtruth_keypoints
in tensor_dict)
include_label_scores = (fields.InputDataFields.groundtruth_confidences in
tensor_dict)
tensor_dict = preprocessor.preprocess(
tensor_dict, data_augmentation_options,
func_arg_map=preprocessor.get_default_func_arg_map(
include_label_scores=include_label_scores,
include_instance_masks=include_instance_masks,
include_keypoints=include_keypoints))
tensor_dict[fields.InputDataFields.image] = tf.squeeze(
......
......@@ -630,6 +630,9 @@ class DataTransformationFnTest(test_case.TestCase):
self.assertAllClose(
transformed_inputs[fields.InputDataFields.groundtruth_confidences],
[[1, 0, 1]])
self.assertAllClose(
transformed_inputs[fields.InputDataFields.num_groundtruth_boxes],
1)
def test_returns_resized_masks(self):
tensor_dict = {
......
......@@ -160,6 +160,17 @@ class FakeDetectionModel(model.DetectionModel):
}
return loss_dict
def regularization_losses(self):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
pass
def restore_map(self, fine_tune_checkpoint_type='detection'):
"""Returns a map of variables to load from a foreign checkpoint.
......@@ -174,6 +185,18 @@ class FakeDetectionModel(model.DetectionModel):
"""
return {var.op.name: var for var in tf.global_variables()}
def updates(self):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
pass
class TrainerTest(tf.test.TestCase):
......
......@@ -662,7 +662,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
anchors_boxlist, clip_window)
else:
anchors_boxlist = box_list_ops.clip_to_window(
anchors_boxlist, clip_window)
anchors_boxlist, clip_window,
filter_nonoverlapping=not self._use_static_shapes)
self._anchors = anchors_boxlist
prediction_dict = {
......@@ -917,12 +918,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
_, num_classes, mask_height, mask_width = (
detection_masks.get_shape().as_list())
_, max_detection = detection_classes.get_shape().as_list()
prediction_dict['mask_predictions'] = tf.reshape(
detection_masks, [-1, num_classes, mask_height, mask_width])
if num_classes > 1:
detection_masks = self._gather_instance_masks(
detection_masks, detection_classes)
prediction_dict[fields.DetectionResultFields.detection_masks] = (
tf.reshape(detection_masks,
tf.reshape(tf.sigmoid(detection_masks),
[batch_size, max_detection, mask_height, mask_width]))
return prediction_dict
......@@ -1159,9 +1162,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
}
# TODO(jrru): Remove mask_predictions from _post_process_box_classifier.
with tf.name_scope('SecondStagePostprocessor'):
if (self._number_of_stages == 2 or
(self._number_of_stages == 3 and self._is_training)):
if (self._number_of_stages == 2 or
(self._number_of_stages == 3 and self._is_training)):
with tf.name_scope('SecondStagePostprocessor'):
mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS)
detections_dict = self._postprocess_box_classifier(
prediction_dict['refined_box_encodings'],
......@@ -1170,18 +1173,53 @@ class FasterRCNNMetaArch(model.DetectionModel):
prediction_dict['num_proposals'],
true_image_shapes,
mask_predictions=mask_predictions)
return detections_dict
if 'rpn_features_to_crop' in prediction_dict and self._initial_crop_size:
self._add_detection_features_output_node(
detections_dict[fields.DetectionResultFields.detection_boxes],
prediction_dict['rpn_features_to_crop'])
return detections_dict
if self._number_of_stages == 3:
# Post processing is already performed in 3rd stage. We need to transfer
# postprocessed tensors from `prediction_dict` to `detections_dict`.
detections_dict = {}
for key in prediction_dict:
if key == fields.DetectionResultFields.detection_masks:
detections_dict[key] = tf.sigmoid(prediction_dict[key])
elif 'detection' in key:
detections_dict[key] = prediction_dict[key]
return detections_dict
return prediction_dict
def _add_detection_features_output_node(self, detection_boxes,
rpn_features_to_crop):
"""Add the detection features to the output node.
The detection features are from cropping rpn_features with boxes.
Each bounding box has one feature vector of length depth, which comes from
mean_pooling of the cropped rpn_features.
Args:
detection_boxes: a 3-D float32 tensor of shape
[batch_size, max_detection, 4] which represents the bounding boxes.
rpn_features_to_crop: A 4-D float32 tensor with shape
[batch, height, width, depth] representing image features to crop using
the proposals boxes.
"""
with tf.name_scope('SecondStageDetectionFeaturesExtract'):
flattened_detected_feature_maps = (
self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, detection_boxes))
detection_features_unpooled = (
self._feature_extractor.extract_box_classifier_features(
flattened_detected_feature_maps,
scope=self.second_stage_feature_extractor_scope))
batch_size = tf.shape(detection_boxes)[0]
max_detection = tf.shape(detection_boxes)[1]
detection_features_pool = tf.reduce_mean(
detection_features_unpooled, axis=[1, 2])
detection_features = tf.reshape(
detection_features_pool,
[batch_size, max_detection, tf.shape(detection_features_pool)[-1]])
detection_features = tf.identity(
detection_features, 'detection_features')
def _postprocess_rpn(self,
rpn_box_encodings_batch,
......@@ -1454,6 +1492,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
# to cls_weights. This could happen as boxes within certain IOU ranges
# are ignored. If triggered, the selected boxes will still be ignored
# during loss computation.
cls_weights = tf.reduce_mean(cls_weights, axis=-1)
positive_indicator = tf.greater(tf.argmax(cls_targets, axis=1), 0)
valid_indicator = tf.logical_and(
tf.range(proposal_boxlist.num_boxes()) < num_valid_proposals,
......@@ -1566,6 +1605,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
mask_predictions_batch = tf.reshape(
mask_predictions, [-1, self.max_num_proposals,
self.num_classes, mask_height, mask_width])
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, _,
num_detections) = self._second_stage_nms_fn(
refined_decoded_boxes_batch,
......@@ -1713,6 +1753,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
gt_box_batch=groundtruth_boxlists,
gt_class_targets_batch=(len(groundtruth_boxlists) * [None]),
gt_weights_batch=groundtruth_weights_list)
batch_cls_weights = tf.reduce_mean(batch_cls_weights, axis=2)
batch_cls_targets = tf.squeeze(batch_cls_targets, axis=2)
def _minibatch_subsample_fn(inputs):
......@@ -1743,7 +1784,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
losses_mask=losses_mask)
objectness_losses = self._first_stage_objectness_loss(
rpn_objectness_predictions_with_background,
batch_one_hot_targets, weights=batch_sampled_indices,
batch_one_hot_targets,
weights=tf.expand_dims(batch_sampled_indices, axis=-1),
losses_mask=losses_mask)
localization_loss = tf.reduce_mean(
tf.reduce_sum(localization_losses, axis=1) / normalizer)
......@@ -1960,25 +2002,28 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf.expand_dims(flat_gt_masks, -1),
tf.expand_dims(flat_normalized_proposals, axis=1),
[mask_height, mask_width])
# Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is
# about 4% worse.
# TODO(rathodv): Investigate this since we don't expect any variables
# upstream of flat_cropped_gt_mask.
flat_cropped_gt_mask = tf.stop_gradient(flat_cropped_gt_mask)
batch_cropped_gt_mask = tf.reshape(
flat_cropped_gt_mask,
[batch_size, -1, mask_height * mask_width])
second_stage_mask_losses = ops.reduce_sum_trailing_dimensions(
self._second_stage_mask_loss(
reshaped_prediction_masks,
batch_cropped_gt_mask,
weights=batch_mask_target_weights,
losses_mask=losses_mask),
ndims=2) / (
mask_height * mask_width * tf.maximum(
tf.reduce_sum(
batch_mask_target_weights, axis=1, keep_dims=True
), tf.ones((batch_size, 1))))
second_stage_mask_loss = tf.reduce_sum(
tf.where(paddings_indicator, second_stage_mask_losses,
tf.zeros_like(second_stage_mask_losses)))
mask_losses_weights = (
batch_mask_target_weights * tf.to_float(paddings_indicator))
mask_losses = self._second_stage_mask_loss(
reshaped_prediction_masks,
batch_cropped_gt_mask,
weights=tf.expand_dims(mask_losses_weights, axis=-1),
losses_mask=losses_mask)
total_mask_loss = tf.reduce_sum(mask_losses)
normalizer = tf.maximum(
tf.reduce_sum(mask_losses_weights * mask_height * mask_width), 1.0)
second_stage_mask_loss = total_mask_loss / normalizer
if second_stage_mask_loss is not None:
mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
......@@ -2073,6 +2118,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
cls_losses=tf.expand_dims(single_image_cls_loss, 0),
decoded_boxlist_list=[proposal_boxlist])
def regularization_losses(self):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
return tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
def restore_map(self,
fine_tune_checkpoint_type='detection',
load_all_detection_checkpoint_vars=False):
......@@ -2117,3 +2173,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
feature_extractor_variables = tf.contrib.framework.filter_variables(
variables_to_restore, include_patterns=include_patterns)
return {var.op.name: var for var in feature_extractor_variables}
def updates(self):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
return tf.get_collection(tf.GraphKeys.UPDATE_OPS)
......@@ -189,7 +189,7 @@ class FasterRCNNMetaArchTest(
set(expected_shapes.keys()).union(
set([
'detection_boxes', 'detection_scores', 'detection_classes',
'detection_masks', 'num_detections'
'detection_masks', 'num_detections', 'mask_predictions',
])))
for key in expected_shapes:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
......@@ -199,6 +199,9 @@ class FasterRCNNMetaArchTest(
self.assertAllEqual(tensor_dict_out['detection_classes'].shape, [2, 5])
self.assertAllEqual(tensor_dict_out['detection_scores'].shape, [2, 5])
self.assertAllEqual(tensor_dict_out['num_detections'].shape, [2])
num_classes = 1 if masks_are_class_agnostic else 2
self.assertAllEqual(tensor_dict_out['mask_predictions'].shape,
[10, num_classes, 14, 14])
@parameterized.parameters(
{'masks_are_class_agnostic': False},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment