Unverified Commit 9bbf8015 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Merged commit includes the following changes: (#6932)

250447559  by Zhichao Lu:

    Update expected files format for Instance Segmentation challenge:
    - add fields ImageWidth, ImageHeight and store the values per prediction
    - as mask, store only encoded image and assume its size is ImageWidth x ImageHeight

--
250402780  by rathodv:

    Fix failing Mask R-CNN TPU convergence test.

    Cast second stage prediction tensors from bfloat16 to float32 to prevent errors in third target assignment (Mask Prediction) - Concat with different types bfloat16 and bfloat32 isn't allowed.

--
250300240  by Zhichao Lu:

    Addion Open Images Challenge 2019 object detection and instance segmentation
    support into Estimator framework.

--
249944839  by rathodv:

    Modify exporter.py to add multiclass score nodes in exported inference graphs.

--
249935201  by rathodv:

    Modify postprocess methods to preserve multiclass scores after non max suppression.

--
249878079  by Zhich...
parent f42fddee
...@@ -271,7 +271,8 @@ class FasterRCNNMetaArchTest( ...@@ -271,7 +271,8 @@ class FasterRCNNMetaArchTest(
set(tensor_dict_out.keys()), set(tensor_dict_out.keys()),
set(expected_shapes.keys()).union( set(expected_shapes.keys()).union(
set([ set([
'detection_boxes', 'detection_scores', 'detection_classes', 'detection_boxes', 'detection_scores',
'detection_multiclass_scores', 'detection_classes',
'detection_masks', 'num_detections', 'mask_predictions', 'detection_masks', 'num_detections', 'mask_predictions',
'raw_detection_boxes', 'raw_detection_scores' 'raw_detection_boxes', 'raw_detection_scores'
]))) ])))
......
...@@ -967,7 +967,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -967,7 +967,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
[[0, 0, .5, .5], [.5, .5, 1, 1]], [[0, .5, .5, 1], [.5, 0, 1, .5]]] [[0, 0, .5, .5], [.5, .5, 1, 1]], [[0, .5, .5, 1], [.5, 0, 1, .5]]]
expected_proposal_scores = [[1, 1], expected_proposal_scores = [[1, 1],
[1, 1]] [1, 1]]
expected_num_proposals = [2, 2] expected_proposal_multiclass_scores = [[[0., 1.], [0., 1.]],
[[0., 1.], [0., 1.]]]
expected_raw_proposal_boxes = [[[0., 0., 0.5, 0.5], [0., 0.5, 0.5, 1.], expected_raw_proposal_boxes = [[[0., 0., 0.5, 0.5], [0., 0.5, 0.5, 1.],
[0.5, 0., 1., 0.5], [0.5, 0.5, 1., 1.]], [0.5, 0., 1., 0.5], [0.5, 0.5, 1., 1.]],
[[0., 0., 0.5, 0.5], [0., 0.5, 0.5, 1.], [[0., 0., 0.5, 0.5], [0., 0.5, 0.5, 1.],
...@@ -975,31 +976,45 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -975,31 +976,45 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
expected_raw_scores = [[[0., 1.], [0., 1.], [0., 1.], [0., 1.]], expected_raw_scores = [[[0., 1.], [0., 1.], [0., 1.], [0., 1.]],
[[0., 1.], [0., 1.], [0., 1.], [0., 1.]]] [[0., 1.], [0., 1.], [0., 1.], [0., 1.]]]
expected_output_keys = set([ expected_output_keys = set([
'detection_boxes', 'detection_scores', 'num_detections', 'detection_boxes', 'detection_scores', 'detection_multiclass_scores',
'raw_detection_boxes', 'raw_detection_scores' 'num_detections', 'raw_detection_boxes', 'raw_detection_scores'
]) ])
self.assertEqual(set(proposals.keys()), expected_output_keys) self.assertEqual(set(proposals.keys()), expected_output_keys)
with self.test_session() as sess: with self.test_session() as sess:
proposals_out = sess.run(proposals) proposals_out = sess.run(proposals)
for image_idx in range(batch_size): for image_idx in range(batch_size):
num_detections = int(proposals_out['num_detections'][image_idx])
boxes = proposals_out['detection_boxes'][
image_idx][:num_detections, :].tolist()
scores = proposals_out['detection_scores'][
image_idx][:num_detections].tolist()
multiclass_scores = proposals_out['detection_multiclass_scores'][
image_idx][:num_detections, :].tolist()
expected_boxes = expected_proposal_boxes[image_idx]
expected_scores = expected_proposal_scores[image_idx]
expected_multiclass_scores = expected_proposal_multiclass_scores[
image_idx]
self.assertTrue( self.assertTrue(
test_utils.first_rows_close_as_set( test_utils.first_rows_close_as_set(boxes, expected_boxes))
proposals_out['detection_boxes'][image_idx].tolist(), self.assertTrue(
expected_proposal_boxes[image_idx])) test_utils.first_rows_close_as_set(scores, expected_scores))
self.assertAllClose(proposals_out['detection_scores'], self.assertTrue(
expected_proposal_scores) test_utils.first_rows_close_as_set(multiclass_scores,
self.assertAllEqual(proposals_out['num_detections'], expected_multiclass_scores))
expected_num_proposals)
self.assertAllClose(proposals_out['raw_detection_boxes'], self.assertAllClose(proposals_out['raw_detection_boxes'],
expected_raw_proposal_boxes) expected_raw_proposal_boxes)
self.assertAllClose(proposals_out['raw_detection_scores'], self.assertAllClose(proposals_out['raw_detection_scores'],
expected_raw_scores) expected_raw_scores)
@parameterized.parameters( @parameterized.named_parameters({
{'use_keras': True}, 'testcase_name': 'keras',
{'use_keras': False} 'use_keras': True
) }, {
'testcase_name': 'slim',
'use_keras': False
})
def test_postprocess_first_stage_only_train_mode(self, use_keras=False): def test_postprocess_first_stage_only_train_mode(self, use_keras=False):
self._test_postprocess_first_stage_only_train_mode(use_keras=use_keras) self._test_postprocess_first_stage_only_train_mode(use_keras=use_keras)
...@@ -1066,7 +1081,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -1066,7 +1081,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
return (detections['num_detections'], detections['detection_boxes'], return (detections['num_detections'], detections['detection_boxes'],
detections['detection_scores'], detections['detection_classes'], detections['detection_scores'], detections['detection_classes'],
detections['raw_detection_boxes'], detections['raw_detection_boxes'],
detections['raw_detection_scores']) detections['raw_detection_scores'],
detections['detection_multiclass_scores'])
proposal_boxes = np.array( proposal_boxes = np.array(
[[[1, 1, 2, 3], [[[1, 1, 2, 3],
...@@ -1097,6 +1113,17 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -1097,6 +1113,17 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
expected_num_detections = [5, 4] expected_num_detections = [5, 4]
expected_detection_classes = [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]] expected_detection_classes = [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]]
expected_detection_scores = [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]] expected_detection_scores = [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]]
expected_multiclass_scores = [[[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]],
[[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[0, 0, 0]]]
h = float(image_shape[1]) h = float(image_shape[1])
w = float(image_shape[2]) w = float(image_shape[2])
expected_raw_detection_boxes = np.array( expected_raw_detection_boxes = np.array(
...@@ -1114,6 +1141,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -1114,6 +1141,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
expected_detection_scores[indx][0:num_proposals]) expected_detection_scores[indx][0:num_proposals])
self.assertAllClose(results[3][indx][0:num_proposals], self.assertAllClose(results[3][indx][0:num_proposals],
expected_detection_classes[indx][0:num_proposals]) expected_detection_classes[indx][0:num_proposals])
self.assertAllClose(results[6][indx][0:num_proposals],
expected_multiclass_scores[indx][0:num_proposals])
self.assertAllClose(results[4], expected_raw_detection_boxes) self.assertAllClose(results[4], expected_raw_detection_boxes)
self.assertAllClose(results[5], self.assertAllClose(results[5],
...@@ -1895,8 +1924,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -1895,8 +1924,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
number_of_stages=2, second_stage_batch_size=6) number_of_stages=2, second_stage_batch_size=6)
inputs_shape = (2, 20, 20, 3) inputs_shape = (2, 20, 20, 3)
inputs = tf.to_float(tf.random_uniform( inputs = tf.cast(tf.random_uniform(
inputs_shape, minval=0, maxval=255, dtype=tf.int32)) inputs_shape, minval=0, maxval=255, dtype=tf.int32), dtype=tf.float32)
preprocessed_inputs, true_image_shapes = model.preprocess(inputs) preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
model.postprocess(prediction_dict, true_image_shapes) model.postprocess(prediction_dict, true_image_shapes)
...@@ -1921,8 +1950,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -1921,8 +1950,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
is_training=False, use_keras=use_keras, is_training=False, use_keras=use_keras,
number_of_stages=2, second_stage_batch_size=6) number_of_stages=2, second_stage_batch_size=6)
inputs_shape = (2, 20, 20, 3) inputs_shape = (2, 20, 20, 3)
inputs = tf.to_float(tf.random_uniform( inputs = tf.cast(tf.random_uniform(
inputs_shape, minval=0, maxval=255, dtype=tf.int32)) inputs_shape, minval=0, maxval=255, dtype=tf.int32), dtype=tf.float32)
preprocessed_inputs, true_image_shapes = model.preprocess(inputs) preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
model.postprocess(prediction_dict, true_image_shapes) model.postprocess(prediction_dict, true_image_shapes)
...@@ -1942,8 +1971,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -1942,8 +1971,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
second_stage_batch_size=6, num_classes=42) second_stage_batch_size=6, num_classes=42)
inputs_shape2 = (2, 20, 20, 3) inputs_shape2 = (2, 20, 20, 3)
inputs2 = tf.to_float(tf.random_uniform( inputs2 = tf.cast(tf.random_uniform(
inputs_shape2, minval=0, maxval=255, dtype=tf.int32)) inputs_shape2, minval=0, maxval=255, dtype=tf.int32),
dtype=tf.float32)
preprocessed_inputs2, true_image_shapes = model2.preprocess(inputs2) preprocessed_inputs2, true_image_shapes = model2.preprocess(inputs2)
prediction_dict2 = model2.predict(preprocessed_inputs2, true_image_shapes) prediction_dict2 = model2.predict(preprocessed_inputs2, true_image_shapes)
model2.postprocess(prediction_dict2, true_image_shapes) model2.postprocess(prediction_dict2, true_image_shapes)
...@@ -1974,8 +2004,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -1974,8 +2004,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
num_classes=42) num_classes=42)
inputs_shape = (2, 20, 20, 3) inputs_shape = (2, 20, 20, 3)
inputs = tf.to_float( inputs = tf.cast(
tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32)) tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
dtype=tf.float32)
preprocessed_inputs, true_image_shapes = model.preprocess(inputs) preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
model.postprocess(prediction_dict, true_image_shapes) model.postprocess(prediction_dict, true_image_shapes)
......
...@@ -297,8 +297,9 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -297,8 +297,9 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
""" """
image_shape_2d = tf.tile(tf.expand_dims(image_shape[1:], 0), image_shape_2d = tf.tile(tf.expand_dims(image_shape[1:], 0),
[image_shape[0], 1]) [image_shape[0], 1])
proposal_boxes_normalized, _, num_proposals, _, _ = self._postprocess_rpn( (proposal_boxes_normalized, _, _, num_proposals, _,
rpn_box_encodings, rpn_objectness_predictions_with_background, _) = self._postprocess_rpn(rpn_box_encodings,
rpn_objectness_predictions_with_background,
anchors, image_shape_2d, true_image_shapes) anchors, image_shape_2d, true_image_shapes)
box_classifier_features = ( box_classifier_features = (
......
...@@ -509,9 +509,9 @@ class SSDMetaArch(model.DetectionModel): ...@@ -509,9 +509,9 @@ class SSDMetaArch(model.DetectionModel):
resized_inputs_shape = shape_utils.combined_static_and_dynamic_shape( resized_inputs_shape = shape_utils.combined_static_and_dynamic_shape(
preprocessed_images) preprocessed_images)
true_heights, true_widths, _ = tf.unstack( true_heights, true_widths, _ = tf.unstack(
tf.to_float(true_image_shapes), axis=1) tf.cast(true_image_shapes, dtype=tf.float32), axis=1)
padded_height = tf.to_float(resized_inputs_shape[1]) padded_height = tf.cast(resized_inputs_shape[1], dtype=tf.float32)
padded_width = tf.to_float(resized_inputs_shape[2]) padded_width = tf.cast(resized_inputs_shape[2], dtype=tf.float32)
return tf.stack( return tf.stack(
[ [
tf.zeros_like(true_heights), tf.zeros_like(true_heights),
...@@ -654,6 +654,9 @@ class SSDMetaArch(model.DetectionModel): ...@@ -654,6 +654,9 @@ class SSDMetaArch(model.DetectionModel):
detection boxes. detection boxes.
detection_scores: [batch, max_detections] tensor with scalar scores for detection_scores: [batch, max_detections] tensor with scalar scores for
post-processed detection boxes. post-processed detection boxes.
detection_multiclass_scores: [batch, max_detections,
num_classes_with_background] tensor with class score distribution for
post-processed detection boxes including background class if any.
detection_classes: [batch, max_detections] tensor with classes for detection_classes: [batch, max_detections] tensor with classes for
post-processed detection classes. post-processed detection classes.
detection_keypoints: [batch, max_detections, num_keypoints, 2] (if detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
...@@ -703,10 +706,13 @@ class SSDMetaArch(model.DetectionModel): ...@@ -703,10 +706,13 @@ class SSDMetaArch(model.DetectionModel):
feature_map_list.append(tf.reshape(feature_map, [batch_size, -1])) feature_map_list.append(tf.reshape(feature_map, [batch_size, -1]))
box_features = tf.concat(feature_map_list, 1) box_features = tf.concat(feature_map_list, 1)
box_features = tf.identity(box_features, 'raw_box_features') box_features = tf.identity(box_features, 'raw_box_features')
if detection_keypoints is not None:
additional_fields = { additional_fields = {
fields.BoxListFields.keypoints: detection_keypoints} 'multiclass_scores': detection_scores_with_background
}
if detection_keypoints is not None:
detection_keypoints = tf.identity(
detection_keypoints, 'raw_keypoint_locations')
additional_fields[fields.BoxListFields.keypoints] = detection_keypoints
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
nmsed_additional_fields, num_detections) = self._non_max_suppression_fn( nmsed_additional_fields, num_detections) = self._non_max_suppression_fn(
detection_boxes, detection_boxes,
...@@ -722,8 +728,10 @@ class SSDMetaArch(model.DetectionModel): ...@@ -722,8 +728,10 @@ class SSDMetaArch(model.DetectionModel):
nmsed_scores, nmsed_scores,
fields.DetectionResultFields.detection_classes: fields.DetectionResultFields.detection_classes:
nmsed_classes, nmsed_classes,
fields.DetectionResultFields.detection_multiclass_scores:
nmsed_additional_fields['multiclass_scores'],
fields.DetectionResultFields.num_detections: fields.DetectionResultFields.num_detections:
tf.to_float(num_detections), tf.cast(num_detections, dtype=tf.float32),
fields.DetectionResultFields.raw_detection_boxes: fields.DetectionResultFields.raw_detection_boxes:
tf.squeeze(detection_boxes, axis=2), tf.squeeze(detection_boxes, axis=2),
fields.DetectionResultFields.raw_detection_scores: fields.DetectionResultFields.raw_detection_scores:
...@@ -786,13 +794,13 @@ class SSDMetaArch(model.DetectionModel): ...@@ -786,13 +794,13 @@ class SSDMetaArch(model.DetectionModel):
if self._random_example_sampler: if self._random_example_sampler:
batch_cls_per_anchor_weights = tf.reduce_mean( batch_cls_per_anchor_weights = tf.reduce_mean(
batch_cls_weights, axis=-1) batch_cls_weights, axis=-1)
batch_sampled_indicator = tf.to_float( batch_sampled_indicator = tf.cast(
shape_utils.static_or_dynamic_map_fn( shape_utils.static_or_dynamic_map_fn(
self._minibatch_subsample_fn, self._minibatch_subsample_fn,
[batch_cls_targets, batch_cls_per_anchor_weights], [batch_cls_targets, batch_cls_per_anchor_weights],
dtype=tf.bool, dtype=tf.bool,
parallel_iterations=self._parallel_iterations, parallel_iterations=self._parallel_iterations,
back_prop=True)) back_prop=True), dtype=tf.float32)
batch_reg_weights = tf.multiply(batch_sampled_indicator, batch_reg_weights = tf.multiply(batch_sampled_indicator,
batch_reg_weights) batch_reg_weights)
batch_cls_weights = tf.multiply( batch_cls_weights = tf.multiply(
...@@ -868,7 +876,8 @@ class SSDMetaArch(model.DetectionModel): ...@@ -868,7 +876,8 @@ class SSDMetaArch(model.DetectionModel):
# Optionally normalize by number of positive matches # Optionally normalize by number of positive matches
normalizer = tf.constant(1.0, dtype=tf.float32) normalizer = tf.constant(1.0, dtype=tf.float32)
if self._normalize_loss_by_num_matches: if self._normalize_loss_by_num_matches:
normalizer = tf.maximum(tf.to_float(tf.reduce_sum(batch_reg_weights)), normalizer = tf.maximum(tf.cast(tf.reduce_sum(batch_reg_weights),
dtype=tf.float32),
1.0) 1.0)
localization_loss_normalizer = normalizer localization_loss_normalizer = normalizer
...@@ -883,8 +892,8 @@ class SSDMetaArch(model.DetectionModel): ...@@ -883,8 +892,8 @@ class SSDMetaArch(model.DetectionModel):
name='classification_loss') name='classification_loss')
loss_dict = { loss_dict = {
str(localization_loss.op.name): localization_loss, 'Loss/localization_loss': localization_loss,
str(classification_loss.op.name): classification_loss 'Loss/classification_loss': classification_loss
} }
...@@ -1025,17 +1034,35 @@ class SSDMetaArch(model.DetectionModel): ...@@ -1025,17 +1034,35 @@ class SSDMetaArch(model.DetectionModel):
with rows of the Match objects corresponding to groundtruth boxes with rows of the Match objects corresponding to groundtruth boxes
and columns corresponding to anchors. and columns corresponding to anchors.
""" """
avg_num_gt_boxes = tf.reduce_mean(tf.to_float(tf.stack( avg_num_gt_boxes = tf.reduce_mean(
[tf.shape(x)[0] for x in groundtruth_boxes_list]))) tf.cast(
avg_num_matched_gt_boxes = tf.reduce_mean(tf.to_float(tf.stack( tf.stack([tf.shape(x)[0] for x in groundtruth_boxes_list]),
[match.num_matched_rows() for match in match_list]))) dtype=tf.float32))
avg_pos_anchors = tf.reduce_mean(tf.to_float(tf.stack( avg_num_matched_gt_boxes = tf.reduce_mean(
[match.num_matched_columns() for match in match_list]))) tf.cast(
avg_neg_anchors = tf.reduce_mean(tf.to_float(tf.stack( tf.stack([match.num_matched_rows() for match in match_list]),
[match.num_unmatched_columns() for match in match_list]))) dtype=tf.float32))
avg_ignored_anchors = tf.reduce_mean(tf.to_float(tf.stack( avg_pos_anchors = tf.reduce_mean(
[match.num_ignored_columns() for match in match_list]))) tf.cast(
tf.stack([match.num_matched_columns() for match in match_list]),
dtype=tf.float32))
avg_neg_anchors = tf.reduce_mean(
tf.cast(
tf.stack([match.num_unmatched_columns() for match in match_list]),
dtype=tf.float32))
avg_ignored_anchors = tf.reduce_mean(
tf.cast(
tf.stack([match.num_ignored_columns() for match in match_list]),
dtype=tf.float32))
# TODO(rathodv): Add a test for these summaries. # TODO(rathodv): Add a test for these summaries.
try:
# TODO(kaftan): Integrate these summaries into the v2 style loops
with tf.compat.v2.init_scope():
if tf.compat.v2.executing_eagerly():
return
except AttributeError:
pass
tf.summary.scalar('AvgNumGroundtruthBoxesPerImage', tf.summary.scalar('AvgNumGroundtruthBoxesPerImage',
avg_num_gt_boxes, avg_num_gt_boxes,
family='TargetAssignment') family='TargetAssignment')
......
...@@ -176,6 +176,9 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, ...@@ -176,6 +176,9 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
] ]
] # padding ] # padding
expected_scores = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] expected_scores = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
expected_multiclass_scores = [[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]]
expected_classes = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] expected_classes = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
expected_num_detections = np.array([3, 3]) expected_num_detections = np.array([3, 3])
...@@ -198,6 +201,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, ...@@ -198,6 +201,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
detections = model.postprocess(prediction_dict, true_image_shapes) detections = model.postprocess(prediction_dict, true_image_shapes)
self.assertIn('detection_boxes', detections) self.assertIn('detection_boxes', detections)
self.assertIn('detection_scores', detections) self.assertIn('detection_scores', detections)
self.assertIn('detection_multiclass_scores', detections)
self.assertIn('detection_classes', detections) self.assertIn('detection_classes', detections)
self.assertIn('num_detections', detections) self.assertIn('num_detections', detections)
self.assertIn('raw_detection_boxes', detections) self.assertIn('raw_detection_boxes', detections)
...@@ -217,6 +221,8 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, ...@@ -217,6 +221,8 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
expected_boxes[image_idx])) expected_boxes[image_idx]))
self.assertAllClose(detections_out['detection_scores'], expected_scores) self.assertAllClose(detections_out['detection_scores'], expected_scores)
self.assertAllClose(detections_out['detection_classes'], expected_classes) self.assertAllClose(detections_out['detection_classes'], expected_classes)
self.assertAllClose(detections_out['detection_multiclass_scores'],
expected_multiclass_scores)
self.assertAllClose(detections_out['num_detections'], self.assertAllClose(detections_out['num_detections'],
expected_num_detections) expected_num_detections)
self.assertAllEqual(detections_out['raw_detection_boxes'], self.assertAllEqual(detections_out['raw_detection_boxes'],
...@@ -235,7 +241,8 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, ...@@ -235,7 +241,8 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
true_image_shapes) true_image_shapes)
detections = model.postprocess(prediction_dict, true_image_shapes) detections = model.postprocess(prediction_dict, true_image_shapes)
return (detections['detection_boxes'], detections['detection_scores'], return (detections['detection_boxes'], detections['detection_scores'],
detections['detection_classes'], detections['num_detections']) detections['detection_classes'], detections['num_detections'],
detections['detection_multiclass_scores'])
batch_size = 2 batch_size = 2
image_size = 2 image_size = 2
...@@ -257,11 +264,14 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, ...@@ -257,11 +264,14 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
] ]
] # padding ] # padding
expected_scores = [[0, 0, 0, 0], [0, 0, 0, 0]] expected_scores = [[0, 0, 0, 0], [0, 0, 0, 0]]
expected_multiclass_scores = [[[0, 0], [0, 0], [0, 0], [0, 0]],
[[0, 0], [0, 0], [0, 0], [0, 0]]]
expected_classes = [[0, 0, 0, 0], [0, 0, 0, 0]] expected_classes = [[0, 0, 0, 0], [0, 0, 0, 0]]
expected_num_detections = np.array([3, 3]) expected_num_detections = np.array([3, 3])
(detection_boxes, detection_scores, detection_classes, (detection_boxes, detection_scores, detection_classes,
num_detections) = self.execute(graph_fn, [input_image]) num_detections, detection_multiclass_scores) = self.execute(graph_fn,
[input_image])
for image_idx in range(batch_size): for image_idx in range(batch_size):
self.assertTrue(test_utils.first_rows_close_as_set( self.assertTrue(test_utils.first_rows_close_as_set(
detection_boxes[image_idx][ detection_boxes[image_idx][
...@@ -270,6 +280,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, ...@@ -270,6 +280,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
self.assertAllClose( self.assertAllClose(
detection_scores[image_idx][0:expected_num_detections[image_idx]], detection_scores[image_idx][0:expected_num_detections[image_idx]],
expected_scores[image_idx][0:expected_num_detections[image_idx]]) expected_scores[image_idx][0:expected_num_detections[image_idx]])
self.assertAllClose(
detection_multiclass_scores[image_idx]
[0:expected_num_detections[image_idx]],
expected_multiclass_scores[image_idx]
[0:expected_num_detections[image_idx]])
self.assertAllClose( self.assertAllClose(
detection_classes[image_idx][0:expected_num_detections[image_idx]], detection_classes[image_idx][0:expected_num_detections[image_idx]],
expected_classes[image_idx][0:expected_num_detections[image_idx]]) expected_classes[image_idx][0:expected_num_detections[image_idx]])
...@@ -600,8 +615,8 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, ...@@ -600,8 +615,8 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
with test_graph_detection.as_default(): with test_graph_detection.as_default():
model, _, _, _ = self._create_model(use_keras=use_keras) model, _, _, _ = self._create_model(use_keras=use_keras)
inputs_shape = [2, 2, 2, 3] inputs_shape = [2, 2, 2, 3]
inputs = tf.to_float(tf.random_uniform( inputs = tf.cast(tf.random_uniform(
inputs_shape, minval=0, maxval=255, dtype=tf.int32)) inputs_shape, minval=0, maxval=255, dtype=tf.int32), dtype=tf.float32)
preprocessed_inputs, true_image_shapes = model.preprocess(inputs) preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
model.postprocess(prediction_dict, true_image_shapes) model.postprocess(prediction_dict, true_image_shapes)
...@@ -620,8 +635,9 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, ...@@ -620,8 +635,9 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
with test_graph_detection.as_default(): with test_graph_detection.as_default():
model, _, _, _ = self._create_model(use_keras=use_keras) model, _, _, _ = self._create_model(use_keras=use_keras)
inputs_shape = [2, 2, 2, 3] inputs_shape = [2, 2, 2, 3]
inputs = tf.to_float( inputs = tf.cast(
tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32)) tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
dtype=tf.float32)
preprocessed_inputs, true_image_shapes = model.preprocess(inputs) preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
model.postprocess(prediction_dict, true_image_shapes) model.postprocess(prediction_dict, true_image_shapes)
......
...@@ -98,13 +98,16 @@ def expected_calibration_error(y_true, y_pred, nbins=20): ...@@ -98,13 +98,16 @@ def expected_calibration_error(y_true, y_pred, nbins=20):
with tf.control_dependencies([bin_ids]): with tf.control_dependencies([bin_ids]):
update_bin_counts_op = tf.assign_add( update_bin_counts_op = tf.assign_add(
bin_counts, tf.to_float(tf.bincount(bin_ids, minlength=nbins))) bin_counts, tf.cast(tf.bincount(bin_ids, minlength=nbins),
dtype=tf.float32))
update_bin_true_sum_op = tf.assign_add( update_bin_true_sum_op = tf.assign_add(
bin_true_sum, bin_true_sum,
tf.to_float(tf.bincount(bin_ids, weights=y_true, minlength=nbins))) tf.cast(tf.bincount(bin_ids, weights=y_true, minlength=nbins),
dtype=tf.float32))
update_bin_preds_sum_op = tf.assign_add( update_bin_preds_sum_op = tf.assign_add(
bin_preds_sum, bin_preds_sum,
tf.to_float(tf.bincount(bin_ids, weights=y_pred, minlength=nbins))) tf.cast(tf.bincount(bin_ids, weights=y_pred, minlength=nbins),
dtype=tf.float32))
ece_update_op = _ece_from_bins( ece_update_op = _ece_from_bins(
update_bin_counts_op, update_bin_counts_op,
......
...@@ -216,29 +216,23 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -216,29 +216,23 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
for key, value in iter(box_metrics.items())} for key, value in iter(box_metrics.items())}
return box_metrics return box_metrics
def get_estimator_eval_metric_ops(self, eval_dict): def add_eval_dict(self, eval_dict):
"""Returns a dictionary of eval metric ops. """Observes an evaluation result dict for a single example.
Note that once value_op is called, the detections and groundtruth added via When executing eagerly, once all observations have been observed by this
update_op are cleared. method you can use `.evaluate()` to get the final metrics.
This function can take in groundtruth and detections for a batch of images, When using `tf.estimator.Estimator` for evaluation this function is used by
or for a single image. For the latter case, the batch dimension for input `get_estimator_eval_metric_ops()` to construct the metric update op.
tensors need not be present.
Args: Args:
eval_dict: A dictionary that holds tensors for evaluating object detection eval_dict: A dictionary that holds tensors for evaluating an object
performance. For single-image evaluation, this dictionary may be detection model, returned from
produced from eval_util.result_dict_for_single_example(). If multi-image eval_util.result_dict_for_single_example().
evaluation, `eval_dict` should contain the fields
'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to
properly unpad the tensors from the batch.
Returns: Returns:
a dictionary of metric names to tuple of value_op and update_op that can None when executing eagerly, or an update_op that can be used to update
be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all the eval metrics in `tf.estimator.EstimatorSpec`.
update ops must be run together and similarly all value ops must be run
together to guarantee correct behaviour.
""" """
def update_op( def update_op(
image_id_batched, image_id_batched,
...@@ -328,7 +322,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -328,7 +322,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
if is_annotated is None: if is_annotated is None:
is_annotated = tf.ones_like(image_id, dtype=tf.bool) is_annotated = tf.ones_like(image_id, dtype=tf.bool)
update_op = tf.py_func(update_op, [image_id, return tf.py_func(update_op, [image_id,
groundtruth_boxes, groundtruth_boxes,
groundtruth_classes, groundtruth_classes,
groundtruth_is_crowd, groundtruth_is_crowd,
...@@ -338,6 +332,32 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -338,6 +332,32 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_classes, detection_classes,
num_det_boxes_per_image, num_det_boxes_per_image,
is_annotated], []) is_annotated], [])
def get_estimator_eval_metric_ops(self, eval_dict):
"""Returns a dictionary of eval metric ops.
Note that once value_op is called, the detections and groundtruth added via
update_op are cleared.
This function can take in groundtruth and detections for a batch of images,
or for a single image. For the latter case, the batch dimension for input
tensors need not be present.
Args:
eval_dict: A dictionary that holds tensors for evaluating object detection
performance. For single-image evaluation, this dictionary may be
produced from eval_util.result_dict_for_single_example(). If multi-image
evaluation, `eval_dict` should contain the fields
'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to
properly unpad the tensors from the batch.
Returns:
a dictionary of metric names to tuple of value_op and update_op that can
be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all
update ops must be run together and similarly all value ops must be run
together to guarantee correct behaviour.
"""
update_op = self.add_eval_dict(eval_dict)
metric_names = ['DetectionBoxes_Precision/mAP', metric_names = ['DetectionBoxes_Precision/mAP',
'DetectionBoxes_Precision/mAP@.50IOU', 'DetectionBoxes_Precision/mAP@.50IOU',
'DetectionBoxes_Precision/mAP@.75IOU', 'DetectionBoxes_Precision/mAP@.75IOU',
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
# ============================================================================== # ==============================================================================
r"""Runs evaluation using OpenImages groundtruth and predictions. r"""Runs evaluation using OpenImages groundtruth and predictions.
Uses Open Images Challenge 2018, 2019 metrics
Example usage: Example usage:
python models/research/object_detection/metrics/oid_od_challenge_evaluation.py \ python models/research/object_detection/metrics/oid_od_challenge_evaluation.py \
--input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \ --input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \
...@@ -21,27 +23,50 @@ python models/research/object_detection/metrics/oid_od_challenge_evaluation.py \ ...@@ -21,27 +23,50 @@ python models/research/object_detection/metrics/oid_od_challenge_evaluation.py \
--input_class_labelmap=/path/to/input/class_labelmap.pbtxt \ --input_class_labelmap=/path/to/input/class_labelmap.pbtxt \
--input_predictions=/path/to/input/predictions.csv \ --input_predictions=/path/to/input/predictions.csv \
--output_metrics=/path/to/output/metric.csv \ --output_metrics=/path/to/output/metric.csv \
--input_annotations_segm=[/path/to/input/annotations-human-mask.csv] \
If optional flag has_masks is True, Mask column is also expected in CSV.
CSVs with bounding box annotations and image label (including the image URLs) CSVs with bounding box annotations, instance segmentations and image label
can be downloaded from the Open Images Challenge website: can be downloaded from the Open Images Challenge website:
https://storage.googleapis.com/openimages/web/challenge.html https://storage.googleapis.com/openimages/web/challenge.html
The format of the input csv and the metrics itself are described on the The format of the input csv and the metrics itself are described on the
challenge website. challenge website as well.
""" """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse from absl import app
from absl import flags
import pandas as pd import pandas as pd
from google.protobuf import text_format from google.protobuf import text_format
from object_detection.metrics import io_utils from object_detection.metrics import io_utils
from object_detection.metrics import oid_od_challenge_evaluation_utils as utils from object_detection.metrics import oid_challenge_evaluation_utils as utils
from object_detection.protos import string_int_label_map_pb2 from object_detection.protos import string_int_label_map_pb2
from object_detection.utils import object_detection_evaluation from object_detection.utils import object_detection_evaluation
flags.DEFINE_string('input_annotations_boxes', None,
'File with groundtruth boxes annotations.')
flags.DEFINE_string('input_annotations_labels', None,
'File with groundtruth labels annotations.')
flags.DEFINE_string(
'input_predictions', None,
"""File with detection predictions; NOTE: no postprocessing is applied in the evaluation script."""
)
flags.DEFINE_string('input_class_labelmap', None,
'Open Images Challenge labelmap.')
flags.DEFINE_string('output_metrics', None, 'Output file with csv metrics.')
flags.DEFINE_string(
'input_annotations_segm', None,
'File with groundtruth instance segmentation annotations [OPTIONAL].')
FLAGS = flags.FLAGS
def _load_labelmap(labelmap_path): def _load_labelmap(labelmap_path):
"""Loads labelmap from the labelmap path. """Loads labelmap from the labelmap path.
...@@ -66,26 +91,43 @@ def _load_labelmap(labelmap_path): ...@@ -66,26 +91,43 @@ def _load_labelmap(labelmap_path):
return labelmap_dict, categories return labelmap_dict, categories
def main(parsed_args): def main(unused_argv):
all_box_annotations = pd.read_csv(parsed_args.input_annotations_boxes) flags.mark_flag_as_required('input_annotations_boxes')
all_label_annotations = pd.read_csv(parsed_args.input_annotations_labels) flags.mark_flag_as_required('input_annotations_labels')
flags.mark_flag_as_required('input_predictions')
flags.mark_flag_as_required('input_class_labelmap')
flags.mark_flag_as_required('output_metrics')
all_location_annotations = pd.read_csv(FLAGS.input_annotations_boxes)
all_label_annotations = pd.read_csv(FLAGS.input_annotations_labels)
all_label_annotations.rename( all_label_annotations.rename(
columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True) columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
all_annotations = pd.concat([all_box_annotations, all_label_annotations])
class_label_map, categories = _load_labelmap(parsed_args.input_class_labelmap) is_instance_segmentation_eval = False
if FLAGS.input_annotations_segm:
is_instance_segmentation_eval = True
all_segm_annotations = pd.read_csv(FLAGS.input_annotations_segm)
# Note: this part is unstable as it requires the float point numbers in both
# csvs are exactly the same;
# Will be replaced by more stable solution: merge on LabelName and ImageID
# and filter down by IoU.
all_location_annotations = utils.merge_boxes_and_masks(
all_location_annotations, all_segm_annotations)
all_annotations = pd.concat([all_location_annotations, all_label_annotations])
class_label_map, categories = _load_labelmap(FLAGS.input_class_labelmap)
challenge_evaluator = ( challenge_evaluator = (
object_detection_evaluation.OpenImagesDetectionChallengeEvaluator( object_detection_evaluation.OpenImagesChallengeEvaluator(
categories)) categories, evaluate_masks=is_instance_segmentation_eval))
for _, groundtruth in enumerate(all_annotations.groupby('ImageID')): for _, groundtruth in enumerate(all_annotations.groupby('ImageID')):
image_id, image_groundtruth = groundtruth image_id, image_groundtruth = groundtruth
groundtruth_dictionary = utils.build_groundtruth_boxes_dictionary( groundtruth_dictionary = utils.build_groundtruth_dictionary(
image_groundtruth, class_label_map) image_groundtruth, class_label_map)
challenge_evaluator.add_single_ground_truth_image_info( challenge_evaluator.add_single_ground_truth_image_info(
image_id, groundtruth_dictionary) image_id, groundtruth_dictionary)
all_predictions = pd.read_csv(parsed_args.input_predictions) all_predictions = pd.read_csv(FLAGS.input_predictions)
for _, prediction_data in enumerate(all_predictions.groupby('ImageID')): for _, prediction_data in enumerate(all_predictions.groupby('ImageID')):
image_id, image_predictions = prediction_data image_id, image_predictions = prediction_data
prediction_dictionary = utils.build_predictions_dictionary( prediction_dictionary = utils.build_predictions_dictionary(
...@@ -95,34 +137,9 @@ def main(parsed_args): ...@@ -95,34 +137,9 @@ def main(parsed_args):
metrics = challenge_evaluator.evaluate() metrics = challenge_evaluator.evaluate()
with open(parsed_args.output_metrics, 'w') as fid: with open(FLAGS.output_metrics, 'w') as fid:
io_utils.write_csv(fid, metrics) io_utils.write_csv(fid, metrics)
if __name__ == '__main__': if __name__ == '__main__':
app.run(main)
parser = argparse.ArgumentParser(
description='Evaluate Open Images Object Detection Challenge predictions.'
)
parser.add_argument(
'--input_annotations_boxes',
required=True,
help='File with groundtruth boxes annotations.')
parser.add_argument(
'--input_annotations_labels',
required=True,
help='File with groundtruth labels annotations')
parser.add_argument(
'--input_predictions',
required=True,
help="""File with detection predictions; NOTE: no postprocessing is
applied in the evaluation script.""")
parser.add_argument(
'--input_class_labelmap',
required=True,
help='Open Images Challenge labelmap.')
parser.add_argument(
'--output_metrics', required=True, help='Output file with csv metrics')
args = parser.parse_args()
main(args)
...@@ -12,17 +12,92 @@ ...@@ -12,17 +12,92 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
r"""Converts data from CSV to the OpenImagesDetectionChallengeEvaluator format. r"""Converts data from CSV to the OpenImagesDetectionChallengeEvaluator format."""
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np
import pandas as pd
from pycocotools import mask
from object_detection.core import standard_fields from object_detection.core import standard_fields
def build_groundtruth_boxes_dictionary(data, class_label_map): def _to_normalized_box(mask_np):
"""Decodes binary segmentation masks into np.arrays and boxes.
Args:
mask_np: np.ndarray of size NxWxH.
Returns:
a np.ndarray of the size Nx4, each row containing normalized coordinates
[YMin, XMin, YMax, XMax] of a box computed of axis parallel enclosing box of
a mask.
"""
coord1, coord2 = np.nonzero(mask_np)
if coord1.size > 0:
ymin = float(min(coord1)) / mask_np.shape[0]
ymax = float(max(coord1) + 1) / mask_np.shape[0]
xmin = float(min(coord2)) / mask_np.shape[1]
xmax = float((max(coord2) + 1)) / mask_np.shape[1]
return np.array([ymin, xmin, ymax, xmax])
else:
return np.array([0.0, 0.0, 0.0, 0.0])
def _decode_raw_data_into_masks_and_boxes(segments, image_widths,
image_heights):
"""Decods binary segmentation masks into np.arrays and boxes.
Args:
segments: pandas Series object containing either None entries or strings
with COCO-encoded binary masks. All masks are expected to be the same size.
image_widths: pandas Series of mask widths.
image_heights: pandas Series of mask heights.
Returns:
a np.ndarray of the size NxWxH, where W and H is determined from the encoded
masks; for the None values, zero arrays of size WxH are created. if input
contains only None values, W=1, H=1.
"""
segment_masks = []
segment_boxes = []
ind = segments.first_valid_index()
if ind is not None:
size = [int(image_heights.iloc[ind]), int(image_widths[ind])]
else:
# It does not matter which size we pick since no masks will ever be
# evaluated.
size = [1, 1]
for segment, im_width, im_height in zip(segments, image_widths,
image_heights):
if pd.isnull(segment):
segment_masks.append(np.zeros([1, size[0], size[1]], dtype=np.uint8))
segment_boxes.append(np.expand_dims(np.array([0.0, 0.0, 0.0, 0.0]), 0))
else:
encoding_dict = {'size': [im_height, im_width], 'counts': segment}
mask_tensor = mask.decode(encoding_dict)
segment_masks.append(np.expand_dims(mask_tensor, 0))
segment_boxes.append(np.expand_dims(_to_normalized_box(mask_tensor), 0))
return np.concatenate(
segment_masks, axis=0), np.concatenate(
segment_boxes, axis=0)
def merge_boxes_and_masks(box_data, mask_data):
return pd.merge(
box_data,
mask_data,
how='outer',
on=['LabelName', 'ImageID', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf'])
def build_groundtruth_dictionary(data, class_label_map):
"""Builds a groundtruth dictionary from groundtruth data in CSV file. """Builds a groundtruth dictionary from groundtruth data in CSV file.
Args: Args:
...@@ -44,21 +119,31 @@ def build_groundtruth_boxes_dictionary(data, class_label_map): ...@@ -44,21 +119,31 @@ def build_groundtruth_boxes_dictionary(data, class_label_map):
M numpy boolean array denoting whether a groundtruth box contains a M numpy boolean array denoting whether a groundtruth box contains a
group of instances. group of instances.
""" """
data_boxes = data[data.ConfidenceImageLabel.isnull()] data_location = data[data.XMin.notnull()]
data_labels = data[data.XMin.isnull()] data_labels = data[data.ConfidenceImageLabel.notnull()]
return { dictionary = {
standard_fields.InputDataFields.groundtruth_boxes: standard_fields.InputDataFields.groundtruth_boxes:
data_boxes[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(), data_location[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(),
standard_fields.InputDataFields.groundtruth_classes: standard_fields.InputDataFields.groundtruth_classes:
data_boxes['LabelName'].map(lambda x: class_label_map[x]).as_matrix(), data_location['LabelName'].map(lambda x: class_label_map[x]
).as_matrix(),
standard_fields.InputDataFields.groundtruth_group_of: standard_fields.InputDataFields.groundtruth_group_of:
data_boxes['IsGroupOf'].as_matrix().astype(int), data_location['IsGroupOf'].as_matrix().astype(int),
standard_fields.InputDataFields.groundtruth_image_classes: standard_fields.InputDataFields.groundtruth_image_classes:
data_labels['LabelName'].map(lambda x: class_label_map[x]) data_labels['LabelName'].map(lambda x: class_label_map[x]
.as_matrix(), ).as_matrix(),
} }
if 'Mask' in data_location:
segments, _ = _decode_raw_data_into_masks_and_boxes(
data_location['Mask'], data_location['ImageWidth'],
data_location['ImageHeight'])
dictionary[
standard_fields.InputDataFields.groundtruth_instance_masks] = segments
return dictionary
def build_predictions_dictionary(data, class_label_map): def build_predictions_dictionary(data, class_label_map):
"""Builds a predictions dictionary from predictions data in CSV file. """Builds a predictions dictionary from predictions data in CSV file.
...@@ -80,11 +165,21 @@ def build_predictions_dictionary(data, class_label_map): ...@@ -80,11 +165,21 @@ def build_predictions_dictionary(data, class_label_map):
the boxes. the boxes.
""" """
return { dictionary = {
standard_fields.DetectionResultFields.detection_boxes:
data[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(),
standard_fields.DetectionResultFields.detection_classes: standard_fields.DetectionResultFields.detection_classes:
data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(), data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(),
standard_fields.DetectionResultFields.detection_scores: standard_fields.DetectionResultFields.detection_scores:
data['Score'].as_matrix() data['Score'].as_matrix()
} }
if 'Mask' in data:
segments, boxes = _decode_raw_data_into_masks_and_boxes(
data['Mask'], data['ImageWidth'], data['ImageHeight'])
dictionary[standard_fields.DetectionResultFields.detection_masks] = segments
dictionary[standard_fields.DetectionResultFields.detection_boxes] = boxes
else:
dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[
'YMin', 'XMin', 'YMax', 'XMax'
]].as_matrix()
return dictionary
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for oid_od_challenge_evaluation_util."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import pandas as pd
from pycocotools import mask
import tensorflow as tf
from object_detection.core import standard_fields
from object_detection.metrics import oid_challenge_evaluation_utils as utils
class OidUtilTest(tf.test.TestCase):
def testMaskToNormalizedBox(self):
mask_np = np.array([[0, 0, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0]])
box = utils._to_normalized_box(mask_np)
self.assertAllEqual(np.array([0.25, 0.25, 0.75, 0.5]), box)
mask_np = np.array([[0, 0, 0, 0], [0, 1, 0, 1], [0, 1, 0, 1], [0, 1, 1, 1]])
box = utils._to_normalized_box(mask_np)
self.assertAllEqual(np.array([0.25, 0.25, 1.0, 1.0]), box)
mask_np = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
box = utils._to_normalized_box(mask_np)
self.assertAllEqual(np.array([0.0, 0.0, 0.0, 0.0]), box)
def testDecodeToTensors(self):
mask1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0]], dtype=np.uint8)
mask2 = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=np.uint8)
encoding1 = mask.encode(np.asfortranarray(mask1))
encoding2 = mask.encode(np.asfortranarray(mask2))
vals = pd.Series([encoding1['counts'], encoding2['counts']])
image_widths = pd.Series([mask1.shape[1], mask2.shape[1]])
image_heights = pd.Series([mask1.shape[0], mask2.shape[0]])
segm, bbox = utils._decode_raw_data_into_masks_and_boxes(
vals, image_widths, image_heights)
expected_segm = np.concatenate(
[np.expand_dims(mask1, 0),
np.expand_dims(mask2, 0)], axis=0)
expected_bbox = np.array([[0.0, 0.5, 2.0 / 3.0, 1.0], [0, 0, 0, 0]])
self.assertAllEqual(expected_segm, segm)
self.assertAllEqual(expected_bbox, bbox)
class OidChallengeEvaluationUtilTest(tf.test.TestCase):
def testBuildGroundtruthDictionaryBoxes(self):
np_data = pd.DataFrame(
[['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 1, None],
['fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0, None],
['fe58ec1b06db2bb7', '/m/04bcr3', None, None, None, None, None, 1],
['fe58ec1b06db2bb7', '/m/083vt', None, None, None, None, None, 0],
['fe58ec1b06db2bb7', '/m/02gy9n', None, None, None, None, None, 1]],
columns=[
'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf',
'ConfidenceImageLabel'
])
class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
groundtruth_dictionary = utils.build_groundtruth_dictionary(
np_data, class_label_map)
self.assertIn(standard_fields.InputDataFields.groundtruth_boxes,
groundtruth_dictionary)
self.assertIn(standard_fields.InputDataFields.groundtruth_classes,
groundtruth_dictionary)
self.assertIn(standard_fields.InputDataFields.groundtruth_group_of,
groundtruth_dictionary)
self.assertIn(standard_fields.InputDataFields.groundtruth_image_classes,
groundtruth_dictionary)
self.assertAllEqual(
np.array([1, 3]), groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_classes])
self.assertAllEqual(
np.array([1, 0]), groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_group_of])
expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]])
self.assertNDArrayNear(
expected_boxes_data, groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_boxes], 1e-5)
self.assertAllEqual(
np.array([1, 2, 3]), groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_image_classes])
def testBuildPredictionDictionaryBoxes(self):
np_data = pd.DataFrame(
[['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 0.1],
['fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0.2],
['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.1, 0.2, 0.3, 0.3]],
columns=[
'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'Score'
])
class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
prediction_dictionary = utils.build_predictions_dictionary(
np_data, class_label_map)
self.assertIn(standard_fields.DetectionResultFields.detection_boxes,
prediction_dictionary)
self.assertIn(standard_fields.DetectionResultFields.detection_classes,
prediction_dictionary)
self.assertIn(standard_fields.DetectionResultFields.detection_scores,
prediction_dictionary)
self.assertAllEqual(
np.array([1, 3, 1]), prediction_dictionary[
standard_fields.DetectionResultFields.detection_classes])
expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2],
[0.2, 0.0, 0.3, 0.1]])
self.assertNDArrayNear(
expected_boxes_data, prediction_dictionary[
standard_fields.DetectionResultFields.detection_boxes], 1e-5)
self.assertNDArrayNear(
np.array([0.1, 0.2, 0.3]), prediction_dictionary[
standard_fields.DetectionResultFields.detection_scores], 1e-5)
def testBuildGroundtruthDictionaryMasks(self):
mask1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]],
dtype=np.uint8)
mask2 = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
dtype=np.uint8)
encoding1 = mask.encode(np.asfortranarray(mask1))
encoding2 = mask.encode(np.asfortranarray(mask2))
np_data = pd.DataFrame(
[[
'fe58ec1b06db2bb7', mask1.shape[1], mask1.shape[0], '/m/04bcr3',
0.0, 0.3, 0.5, 0.6, 0, None, encoding1['counts']
],
[
'fe58ec1b06db2bb7', None, None, '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 1,
None, None
],
[
'fe58ec1b06db2bb7', mask2.shape[1], mask2.shape[0], '/m/02gy9n',
0.5, 0.6, 0.8, 0.9, 0, None, encoding2['counts']
],
[
'fe58ec1b06db2bb7', None, None, '/m/04bcr3', None, None, None,
None, None, 1, None
],
[
'fe58ec1b06db2bb7', None, None, '/m/083vt', None, None, None, None,
None, 0, None
],
[
'fe58ec1b06db2bb7', None, None, '/m/02gy9n', None, None, None,
None, None, 1, None
]],
columns=[
'ImageID', 'ImageWidth', 'ImageHeight', 'LabelName', 'XMin', 'XMax',
'YMin', 'YMax', 'IsGroupOf', 'ConfidenceImageLabel', 'Mask'
])
class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
groundtruth_dictionary = utils.build_groundtruth_dictionary(
np_data, class_label_map)
self.assertIn(standard_fields.InputDataFields.groundtruth_boxes,
groundtruth_dictionary)
self.assertIn(standard_fields.InputDataFields.groundtruth_classes,
groundtruth_dictionary)
self.assertIn(standard_fields.InputDataFields.groundtruth_group_of,
groundtruth_dictionary)
self.assertIn(standard_fields.InputDataFields.groundtruth_image_classes,
groundtruth_dictionary)
self.assertIn(standard_fields.InputDataFields.groundtruth_instance_masks,
groundtruth_dictionary)
self.assertAllEqual(
np.array([1, 3, 3]), groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_classes])
self.assertAllEqual(
np.array([0, 1, 0]), groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_group_of])
expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2],
[0.8, 0.5, 0.9, 0.6]])
self.assertNDArrayNear(
expected_boxes_data, groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_boxes], 1e-5)
self.assertAllEqual(
np.array([1, 2, 3]), groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_image_classes])
expected_segm = np.concatenate([
np.expand_dims(mask1, 0),
np.zeros((1, 4, 4), dtype=np.uint8),
np.expand_dims(mask2, 0)
],
axis=0)
self.assertAllEqual(
expected_segm, groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_instance_masks])
def testBuildPredictionDictionaryMasks(self):
mask1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]],
dtype=np.uint8)
mask2 = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
dtype=np.uint8)
encoding1 = mask.encode(np.asfortranarray(mask1))
encoding2 = mask.encode(np.asfortranarray(mask2))
np_data = pd.DataFrame(
[[
'fe58ec1b06db2bb7', mask1.shape[1], mask1.shape[0], '/m/04bcr3',
encoding1['counts'], 0.8
],
[
'fe58ec1b06db2bb7', mask2.shape[1], mask2.shape[0], '/m/02gy9n',
encoding2['counts'], 0.6
]],
columns=[
'ImageID', 'ImageWidth', 'ImageHeight', 'LabelName', 'Mask', 'Score'
])
class_label_map = {'/m/04bcr3': 1, '/m/02gy9n': 3}
prediction_dictionary = utils.build_predictions_dictionary(
np_data, class_label_map)
self.assertIn(standard_fields.DetectionResultFields.detection_boxes,
prediction_dictionary)
self.assertIn(standard_fields.DetectionResultFields.detection_classes,
prediction_dictionary)
self.assertIn(standard_fields.DetectionResultFields.detection_scores,
prediction_dictionary)
self.assertIn(standard_fields.DetectionResultFields.detection_masks,
prediction_dictionary)
self.assertAllEqual(
np.array([1, 3]), prediction_dictionary[
standard_fields.DetectionResultFields.detection_classes])
expected_boxes_data = np.array([[0.0, 0.5, 0.5, 1.0], [0, 0, 0, 0]])
self.assertNDArrayNear(
expected_boxes_data, prediction_dictionary[
standard_fields.DetectionResultFields.detection_boxes], 1e-5)
self.assertNDArrayNear(
np.array([0.8, 0.6]), prediction_dictionary[
standard_fields.DetectionResultFields.detection_scores], 1e-5)
expected_segm = np.concatenate(
[np.expand_dims(mask1, 0),
np.expand_dims(mask2, 0)], axis=0)
self.assertAllEqual(
expected_segm, prediction_dictionary[
standard_fields.DetectionResultFields.detection_masks])
if __name__ == '__main__':
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for oid_od_challenge_evaluation_util."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import pandas as pd
import tensorflow as tf
from object_detection.core import standard_fields
from object_detection.metrics import oid_od_challenge_evaluation_utils as utils
class OidOdChallengeEvaluationUtilTest(tf.test.TestCase):
def testBuildGroundtruthDictionary(self):
np_data = pd.DataFrame(
[['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 1, None], [
'fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0, None
], ['fe58ec1b06db2bb7', '/m/04bcr3', None, None, None, None, None, 1], [
'fe58ec1b06db2bb7', '/m/083vt', None, None, None, None, None, 0
], ['fe58ec1b06db2bb7', '/m/02gy9n', None, None, None, None, None, 1]],
columns=[
'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf',
'ConfidenceImageLabel'
])
class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
groundtruth_dictionary = utils.build_groundtruth_boxes_dictionary(
np_data, class_label_map)
self.assertTrue(standard_fields.InputDataFields.groundtruth_boxes in
groundtruth_dictionary)
self.assertTrue(standard_fields.InputDataFields.groundtruth_classes in
groundtruth_dictionary)
self.assertTrue(standard_fields.InputDataFields.groundtruth_group_of in
groundtruth_dictionary)
self.assertTrue(standard_fields.InputDataFields.groundtruth_image_classes in
groundtruth_dictionary)
self.assertAllEqual(
np.array([1, 3]), groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_classes])
self.assertAllEqual(
np.array([1, 0]), groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_group_of])
expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]])
self.assertNDArrayNear(
expected_boxes_data, groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_boxes], 1e-5)
self.assertAllEqual(
np.array([1, 2, 3]), groundtruth_dictionary[
standard_fields.InputDataFields.groundtruth_image_classes])
def testBuildPredictionDictionary(self):
np_data = pd.DataFrame(
[['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 0.1], [
'fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0.2
], ['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.1, 0.2, 0.3, 0.3]],
columns=[
'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'Score'
])
class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
prediction_dictionary = utils.build_predictions_dictionary(
np_data, class_label_map)
self.assertTrue(standard_fields.DetectionResultFields.detection_boxes in
prediction_dictionary)
self.assertTrue(standard_fields.DetectionResultFields.detection_classes in
prediction_dictionary)
self.assertTrue(standard_fields.DetectionResultFields.detection_scores in
prediction_dictionary)
self.assertAllEqual(
np.array([1, 3, 1]), prediction_dictionary[
standard_fields.DetectionResultFields.detection_classes])
expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2],
[0.2, 0.0, 0.3, 0.1]])
self.assertNDArrayNear(
expected_boxes_data, prediction_dictionary[
standard_fields.DetectionResultFields.detection_boxes], 1e-5)
self.assertNDArrayNear(
np.array([0.1, 0.2, 0.3]), prediction_dictionary[
standard_fields.DetectionResultFields.detection_scores], 1e-5)
if __name__ == '__main__':
tf.test.main()
...@@ -17,7 +17,7 @@ r"""Runs evaluation using OpenImages groundtruth and predictions. ...@@ -17,7 +17,7 @@ r"""Runs evaluation using OpenImages groundtruth and predictions.
Example usage: Example usage:
python \ python \
models/research/object_detection/metrics/oid_vrd_challenge_evaluation.py \ models/research/object_detection/metrics/oid_vrd_challenge_evaluation.py \
--input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \ --input_annotations_vrd=/path/to/input/annotations-human-bbox.csv \
--input_annotations_labels=/path/to/input/annotations-label.csv \ --input_annotations_labels=/path/to/input/annotations-label.csv \
--input_class_labelmap=/path/to/input/class_labelmap.pbtxt \ --input_class_labelmap=/path/to/input/class_labelmap.pbtxt \
--input_relationship_labelmap=/path/to/input/relationship_labelmap.pbtxt \ --input_relationship_labelmap=/path/to/input/relationship_labelmap.pbtxt \
...@@ -126,7 +126,7 @@ if __name__ == '__main__': ...@@ -126,7 +126,7 @@ if __name__ == '__main__':
description= description=
'Evaluate Open Images Visual Relationship Detection predictions.') 'Evaluate Open Images Visual Relationship Detection predictions.')
parser.add_argument( parser.add_argument(
'--input_annotations_boxes', '--input_annotations_vrd',
required=True, required=True,
help='File with groundtruth vrd annotations.') help='File with groundtruth vrd annotations.')
parser.add_argument( parser.add_argument(
......
...@@ -187,6 +187,46 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True): ...@@ -187,6 +187,46 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
return unbatched_tensor_dict return unbatched_tensor_dict
def _provide_groundtruth(model, labels):
"""Provides the labels to a model as groundtruth.
This helper function extracts the corresponding boxes, classes,
keypoints, weights, masks, etc. from the labels, and provides it
as groundtruth to the models.
Args:
model: The detection model to provide groundtruth to.
labels: The labels for the training or evaluation inputs.
"""
gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
gt_masks_list = None
if fields.InputDataFields.groundtruth_instance_masks in labels:
gt_masks_list = labels[
fields.InputDataFields.groundtruth_instance_masks]
gt_keypoints_list = None
if fields.InputDataFields.groundtruth_keypoints in labels:
gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
gt_weights_list = None
if fields.InputDataFields.groundtruth_weights in labels:
gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
gt_confidences_list = None
if fields.InputDataFields.groundtruth_confidences in labels:
gt_confidences_list = labels[
fields.InputDataFields.groundtruth_confidences]
gt_is_crowd_list = None
if fields.InputDataFields.groundtruth_is_crowd in labels:
gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
model.provide_groundtruth(
groundtruth_boxes_list=gt_boxes_list,
groundtruth_classes_list=gt_classes_list,
groundtruth_confidences_list=gt_confidences_list,
groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list,
groundtruth_weights_list=gt_weights_list,
groundtruth_is_crowd_list=gt_is_crowd_list)
def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
postprocess_on_cpu=False): postprocess_on_cpu=False):
"""Creates a model function for `Estimator`. """Creates a model function for `Estimator`.
...@@ -247,33 +287,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False, ...@@ -247,33 +287,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)
if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] _provide_groundtruth(detection_model, labels)
gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
gt_masks_list = None
if fields.InputDataFields.groundtruth_instance_masks in labels:
gt_masks_list = labels[
fields.InputDataFields.groundtruth_instance_masks]
gt_keypoints_list = None
if fields.InputDataFields.groundtruth_keypoints in labels:
gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
gt_weights_list = None
if fields.InputDataFields.groundtruth_weights in labels:
gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
gt_confidences_list = None
if fields.InputDataFields.groundtruth_confidences in labels:
gt_confidences_list = labels[
fields.InputDataFields.groundtruth_confidences]
gt_is_crowd_list = None
if fields.InputDataFields.groundtruth_is_crowd in labels:
gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
detection_model.provide_groundtruth(
groundtruth_boxes_list=gt_boxes_list,
groundtruth_classes_list=gt_classes_list,
groundtruth_confidences_list=gt_confidences_list,
groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list,
groundtruth_weights_list=gt_weights_list,
groundtruth_is_crowd_list=gt_is_crowd_list)
preprocessed_images = features[fields.InputDataFields.image] preprocessed_images = features[fields.InputDataFields.image]
if use_tpu and train_config.use_bfloat16: if use_tpu and train_config.use_bfloat16:
......
...@@ -225,6 +225,9 @@ class _LayersOverride(object): ...@@ -225,6 +225,9 @@ class _LayersOverride(object):
placeholder_with_default = tf.placeholder_with_default( placeholder_with_default = tf.placeholder_with_default(
input=input_tensor, shape=[None] + shape) input=input_tensor, shape=[None] + shape)
if tf.executing_eagerly():
return tf.keras.layers.Input(shape=shape)
else:
return tf.keras.layers.Input(tensor=placeholder_with_default) return tf.keras.layers.Input(tensor=placeholder_with_default)
# pylint: disable=unused-argument # pylint: disable=unused-argument
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
import functools import functools
import tensorflow as tf import tensorflow as tf
from object_detection.core import box_predictor from object_detection.core import box_predictor
from object_detection.utils import shape_utils
from object_detection.utils import static_shape from object_detection.utils import static_shape
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -350,7 +351,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor): ...@@ -350,7 +351,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
'feature maps, found: {}'.format( 'feature maps, found: {}'.format(
num_predictions_per_location_list)) num_predictions_per_location_list))
feature_channels = [ feature_channels = [
image_feature.shape[3].value for image_feature in image_features shape_utils.get_dim_as_int(image_feature.shape[3])
for image_feature in image_features
] ]
has_different_feature_channels = len(set(feature_channels)) > 1 has_different_feature_channels = len(set(feature_channels)) > 1
if has_different_feature_channels: if has_different_feature_channels:
......
...@@ -19,6 +19,7 @@ import collections ...@@ -19,6 +19,7 @@ import collections
import tensorflow as tf import tensorflow as tf
from object_detection.core import box_predictor from object_detection.core import box_predictor
from object_detection.utils import shape_utils
from object_detection.utils import static_shape from object_detection.utils import static_shape
keras = tf.keras.layers keras = tf.keras.layers
...@@ -371,7 +372,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -371,7 +372,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
def build(self, input_shapes): def build(self, input_shapes):
"""Creates the variables of the layer.""" """Creates the variables of the layer."""
feature_channels = [ feature_channels = [
input_shape[3].value for input_shape in input_shapes shape_utils.get_dim_as_int(input_shape[3])
for input_shape in input_shapes
] ]
has_different_feature_channels = len(set(feature_channels)) > 1 has_different_feature_channels = len(set(feature_channels)) > 1
if has_different_feature_channels: if has_different_feature_channels:
......
...@@ -24,6 +24,7 @@ import tensorflow as tf ...@@ -24,6 +24,7 @@ import tensorflow as tf
from object_detection.predictors.heads import head from object_detection.predictors.heads import head
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import shape_utils
class ConvolutionalMaskHead(head.KerasHead): class ConvolutionalMaskHead(head.KerasHead):
...@@ -254,8 +255,10 @@ class MaskRCNNMaskHead(head.KerasHead): ...@@ -254,8 +255,10 @@ class MaskRCNNMaskHead(head.KerasHead):
if self._convolve_then_upsample: if self._convolve_then_upsample:
# Replace Transposed Convolution with a Nearest Neighbor upsampling step # Replace Transposed Convolution with a Nearest Neighbor upsampling step
# followed by 3x3 convolution. # followed by 3x3 convolution.
height_scale = self._mask_height / input_shapes[1].value height_scale = self._mask_height / shape_utils.get_dim_as_int(
width_scale = self._mask_width / input_shapes[2].value input_shapes[1])
width_scale = self._mask_width / shape_utils.get_dim_as_int(
input_shapes[2])
# pylint: disable=g-long-lambda # pylint: disable=g-long-lambda
self._mask_predictor_layers.append(tf.keras.layers.Lambda( self._mask_predictor_layers.append(tf.keras.layers.Lambda(
lambda features: ops.nearest_neighbor_upsampling( lambda features: ops.nearest_neighbor_upsampling(
......
...@@ -128,7 +128,7 @@ class RfcnBoxPredictor(box_predictor.BoxPredictor): ...@@ -128,7 +128,7 @@ class RfcnBoxPredictor(box_predictor.BoxPredictor):
crop_size=self._crop_size, crop_size=self._crop_size,
num_spatial_bins=self._num_spatial_bins, num_spatial_bins=self._num_spatial_bins,
global_pool=True) global_pool=True)
box_encodings = tf.squeeze(box_encodings, squeeze_dims=[2, 3]) box_encodings = tf.squeeze(box_encodings, axis=[2, 3])
box_encodings = tf.reshape(box_encodings, box_encodings = tf.reshape(box_encodings,
[batch_size * num_boxes, 1, self.num_classes, [batch_size * num_boxes, 1, self.num_classes,
self._box_code_size]) self._box_code_size])
...@@ -149,7 +149,7 @@ class RfcnBoxPredictor(box_predictor.BoxPredictor): ...@@ -149,7 +149,7 @@ class RfcnBoxPredictor(box_predictor.BoxPredictor):
num_spatial_bins=self._num_spatial_bins, num_spatial_bins=self._num_spatial_bins,
global_pool=True)) global_pool=True))
class_predictions_with_background = tf.squeeze( class_predictions_with_background = tf.squeeze(
class_predictions_with_background, squeeze_dims=[2, 3]) class_predictions_with_background, axis=[2, 3])
class_predictions_with_background = tf.reshape( class_predictions_with_background = tf.reshape(
class_predictions_with_background, class_predictions_with_background,
[batch_size * num_boxes, 1, total_classes]) [batch_size * num_boxes, 1, total_classes])
......
...@@ -176,7 +176,7 @@ class RfcnKerasBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -176,7 +176,7 @@ class RfcnKerasBoxPredictor(box_predictor.KerasBoxPredictor):
crop_size=self._crop_size, crop_size=self._crop_size,
num_spatial_bins=self._num_spatial_bins, num_spatial_bins=self._num_spatial_bins,
global_pool=True) global_pool=True)
box_encodings = tf.squeeze(box_encodings, squeeze_dims=[2, 3]) box_encodings = tf.squeeze(box_encodings, axis=[2, 3])
box_encodings = tf.reshape(box_encodings, box_encodings = tf.reshape(box_encodings,
[batch_size * num_boxes, 1, self.num_classes, [batch_size * num_boxes, 1, self.num_classes,
self._box_code_size]) self._box_code_size])
...@@ -193,7 +193,7 @@ class RfcnKerasBoxPredictor(box_predictor.KerasBoxPredictor): ...@@ -193,7 +193,7 @@ class RfcnKerasBoxPredictor(box_predictor.KerasBoxPredictor):
num_spatial_bins=self._num_spatial_bins, num_spatial_bins=self._num_spatial_bins,
global_pool=True)) global_pool=True))
class_predictions_with_background = tf.squeeze( class_predictions_with_background = tf.squeeze(
class_predictions_with_background, squeeze_dims=[2, 3]) class_predictions_with_background, axis=[2, 3])
class_predictions_with_background = tf.reshape( class_predictions_with_background = tf.reshape(
class_predictions_with_background, class_predictions_with_background,
[batch_size * num_boxes, 1, self._total_classes]) [batch_size * num_boxes, 1, self._total_classes])
......
...@@ -76,4 +76,8 @@ message EvalConfig { ...@@ -76,4 +76,8 @@ message EvalConfig {
// If True, additionally include per-category metrics. // If True, additionally include per-category metrics.
optional bool include_metrics_per_category = 24 [default=false]; optional bool include_metrics_per_category = 24 [default=false];
// Recall range within which precision should be computed.
optional float recall_lower_bound = 26 [default = 0.0];
optional float recall_upper_bound = 27 [default = 1.0];
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment