Merged commit includes the following changes: (#6932)

250447559 by Zhichao Lu: Update expected files format for Instance Segmentation challenge: - add fields ImageWidth, ImageHeight and store the values per prediction - as mask, store only encoded image and assume its size is ImageWidth x ImageHeight -- 250402780 by rathodv: Fix failing Mask R-CNN TPU convergence test. Cast second stage prediction tensors from bfloat16 to float32 to prevent errors in third target assignment (Mask Prediction) - Concat with different types bfloat16 and bfloat32 isn't allowed. -- 250300240 by Zhichao Lu: Addion Open Images Challenge 2019 object detection and instance segmentation support into Estimator framework. -- 249944839 by rathodv: Modify exporter.py to add multiclass score nodes in exported inference graphs. -- 249935201 by rathodv: Modify postprocess methods to preserve multiclass scores after non max suppression. -- 249878079 by Zhich...

Merged commit includes the following changes: (#6932)
250447559 by Zhichao Lu: Update expected files format for Instance Segmentation challenge: - add fields ImageWidth, ImageHeight and store the values per prediction - as mask, store only encoded image and assume its size is ImageWidth x ImageHeight -- 250402780 by rathodv: Fix failing Mask R-CNN TPU convergence test. Cast second stage prediction tensors from bfloat16 to float32 to prevent errors in third target assignment (Mask Prediction) - Concat with different types bfloat16 and bfloat32 isn't allowed. -- 250300240 by Zhichao Lu: Addion Open Images Challenge 2019 object detection and instance segmentation support into Estimator framework. -- 249944839 by rathodv: Modify exporter.py to add multiclass score nodes in exported inference graphs. -- 249935201 by rathodv: Modify postprocess methods to preserve multiclass scores after non max suppression. -- 249878079 by Zhich...
9bbf8015 · pkulzc · GitHub · f42fddee · 9bbf8015 · 9bbf8015
Unverified Commit 9bbf8015 authored May 30, 2019 by pkulzc Committed by GitHub May 30, 2019
20 changed files
--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
@@ -271,7 +271,8 @@ class FasterRCNNMetaArchTest(
          set(tensor_dict_out.keys()),
          set(expected_shapes.keys()).union(
              set([
-                  'detection_boxes', 'detection_scores', 'detection_classes',
+                  'detection_boxes', 'detection_scores',
+                  'detection_multiclass_scores', 'detection_classes',
                  'detection_masks', 'num_detections', 'mask_predictions',
                  'raw_detection_boxes', 'raw_detection_scores'
              ])))

--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
@@ -967,7 +967,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
        [[0, 0, .5, .5], [.5, .5, 1, 1]], [[0, .5, .5, 1], [.5, 0, 1, .5]]]
    expected_proposal_scores = [[1, 1],
                                [1, 1]]
-    expected_num_proposals = [2, 2]
+    expected_proposal_multiclass_scores = [[[0., 1.], [0., 1.]],
+                                           [[0., 1.], [0., 1.]]]
    expected_raw_proposal_boxes = [[[0., 0., 0.5, 0.5], [0., 0.5, 0.5, 1.],
                                    [0.5, 0., 1., 0.5], [0.5, 0.5, 1., 1.]],
                                   [[0., 0., 0.5, 0.5], [0., 0.5, 0.5, 1.],
@@ -975,31 +976,45 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
    expected_raw_scores = [[[0., 1.], [0., 1.], [0., 1.], [0., 1.]],
                           [[0., 1.], [0., 1.], [0., 1.], [0., 1.]]]
    expected_output_keys = set([
-        'detection_boxes', 'detection_scores', 'num_detections',
+        'detection_boxes', 'detection_scores', 'detection_multiclass_scores',
-        'raw_detection_boxes', 'raw_detection_scores'
+        'num_detections', 'raw_detection_boxes', 'raw_detection_scores'
    ])
    self.assertEqual(set(proposals.keys()), expected_output_keys)
    with self.test_session() as sess:
      proposals_out = sess.run(proposals)
      for image_idx in range(batch_size):
+        num_detections = int(proposals_out['num_detections'][image_idx])
+        boxes = proposals_out['detection_boxes'][
+            image_idx][:num_detections, :].tolist()
+        scores = proposals_out['detection_scores'][
+            image_idx][:num_detections].tolist()
+        multiclass_scores = proposals_out['detection_multiclass_scores'][
+            image_idx][:num_detections, :].tolist()
+        expected_boxes = expected_proposal_boxes[image_idx]
+        expected_scores = expected_proposal_scores[image_idx]
+        expected_multiclass_scores = expected_proposal_multiclass_scores[
+            image_idx]
        self.assertTrue(
-            test_utils.first_rows_close_as_set(
+            test_utils.first_rows_close_as_set(boxes, expected_boxes))
-                proposals_out['detection_boxes'][image_idx].tolist(),
+        self.assertTrue(
-                expected_proposal_boxes[image_idx]))
+            test_utils.first_rows_close_as_set(scores, expected_scores))
-      self.assertAllClose(proposals_out['detection_scores'],
+        self.assertTrue(
-                          expected_proposal_scores)
+            test_utils.first_rows_close_as_set(multiclass_scores,
-      self.assertAllEqual(proposals_out['num_detections'],
+                                               expected_multiclass_scores))
-                          expected_num_proposals)
    self.assertAllClose(proposals_out['raw_detection_boxes'],
                        expected_raw_proposal_boxes)
    self.assertAllClose(proposals_out['raw_detection_scores'],
                        expected_raw_scores)
-  @parameterized.parameters(
+  @parameterized.named_parameters({
-      {'use_keras': True},
+      'testcase_name': 'keras',
-      {'use_keras': False}
+      'use_keras': True
-  )
+  }, {
+      'testcase_name': 'slim',
+      'use_keras': False
+  })
  def test_postprocess_first_stage_only_train_mode(self, use_keras=False):
    self._test_postprocess_first_stage_only_train_mode(use_keras=use_keras)
@@ -1066,7 +1081,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
      return (detections['num_detections'], detections['detection_boxes'],
              detections['detection_scores'], detections['detection_classes'],
              detections['raw_detection_boxes'],
-              detections['raw_detection_scores'])
+              detections['raw_detection_scores'],
+              detections['detection_multiclass_scores'])
    proposal_boxes = np.array(
        [[[1, 1, 2, 3],
@@ -1097,6 +1113,17 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
    expected_num_detections = [5, 4]
    expected_detection_classes = [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]]
    expected_detection_scores = [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]]
+    expected_multiclass_scores = [[[1, 1, 1],
+                                   [1, 1, 1],
+                                   [1, 1, 1],
+                                   [1, 1, 1],
+                                   [1, 1, 1]],
+                                  [[1, 1, 1],
+                                   [1, 1, 1],
+                                   [1, 1, 1],
+                                   [1, 1, 1],
+                                   [0, 0, 0]]]
    h = float(image_shape[1])
    w = float(image_shape[2])
    expected_raw_detection_boxes = np.array(
@@ -1114,6 +1141,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
                          expected_detection_scores[indx][0:num_proposals])
      self.assertAllClose(results[3][indx][0:num_proposals],
                          expected_detection_classes[indx][0:num_proposals])
+      self.assertAllClose(results[6][indx][0:num_proposals],
+                          expected_multiclass_scores[indx][0:num_proposals])
    self.assertAllClose(results[4], expected_raw_detection_boxes)
    self.assertAllClose(results[5],
@@ -1895,8 +1924,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
          number_of_stages=2, second_stage_batch_size=6)
      inputs_shape = (2, 20, 20, 3)
-      inputs = tf.to_float(tf.random_uniform(
+      inputs = tf.cast(tf.random_uniform(
-          inputs_shape, minval=0, maxval=255, dtype=tf.int32))
+          inputs_shape, minval=0, maxval=255, dtype=tf.int32), dtype=tf.float32)
      preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
      prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
      model.postprocess(prediction_dict, true_image_shapes)
@@ -1921,8 +1950,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
          is_training=False, use_keras=use_keras,
          number_of_stages=2, second_stage_batch_size=6)
      inputs_shape = (2, 20, 20, 3)
-      inputs = tf.to_float(tf.random_uniform(
+      inputs = tf.cast(tf.random_uniform(
-          inputs_shape, minval=0, maxval=255, dtype=tf.int32))
+          inputs_shape, minval=0, maxval=255, dtype=tf.int32), dtype=tf.float32)
      preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
      prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
      model.postprocess(prediction_dict, true_image_shapes)
@@ -1942,8 +1971,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
                                 second_stage_batch_size=6, num_classes=42)
      inputs_shape2 = (2, 20, 20, 3)
-      inputs2 = tf.to_float(tf.random_uniform(
+      inputs2 = tf.cast(tf.random_uniform(
-          inputs_shape2, minval=0, maxval=255, dtype=tf.int32))
+          inputs_shape2, minval=0, maxval=255, dtype=tf.int32),
+                        dtype=tf.float32)
      preprocessed_inputs2, true_image_shapes = model2.preprocess(inputs2)
      prediction_dict2 = model2.predict(preprocessed_inputs2, true_image_shapes)
      model2.postprocess(prediction_dict2, true_image_shapes)
@@ -1974,8 +2004,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
          num_classes=42)
      inputs_shape = (2, 20, 20, 3)
-      inputs = tf.to_float(
+      inputs = tf.cast(
-          tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32))
+          tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
+          dtype=tf.float32)
      preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
      prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
      model.postprocess(prediction_dict, true_image_shapes)

--- a/research/object_detection/meta_architectures/rfcn_meta_arch.py
+++ b/research/object_detection/meta_architectures/rfcn_meta_arch.py
@@ -297,8 +297,9 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
    """
    image_shape_2d = tf.tile(tf.expand_dims(image_shape[1:], 0),
                             [image_shape[0], 1])
-    proposal_boxes_normalized, _, num_proposals, _, _ = self._postprocess_rpn(
+    (proposal_boxes_normalized, _, _, num_proposals, _,
-        rpn_box_encodings, rpn_objectness_predictions_with_background,
+     _) = self._postprocess_rpn(rpn_box_encodings,
+                                rpn_objectness_predictions_with_background,
                                anchors, image_shape_2d, true_image_shapes)
    box_classifier_features = (

--- a/research/object_detection/meta_architectures/ssd_meta_arch.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch.py
@@ -509,9 +509,9 @@ class SSDMetaArch(model.DetectionModel):
    resized_inputs_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_images)
    true_heights, true_widths, _ = tf.unstack(
-        tf.to_float(true_image_shapes), axis=1)
+        tf.cast(true_image_shapes, dtype=tf.float32), axis=1)
-    padded_height = tf.to_float(resized_inputs_shape[1])
+    padded_height = tf.cast(resized_inputs_shape[1], dtype=tf.float32)
-    padded_width = tf.to_float(resized_inputs_shape[2])
+    padded_width = tf.cast(resized_inputs_shape[2], dtype=tf.float32)
    return tf.stack(
        [
            tf.zeros_like(true_heights),
@@ -654,6 +654,9 @@ class SSDMetaArch(model.DetectionModel):
          detection boxes.
        detection_scores: [batch, max_detections] tensor with scalar scores for
          post-processed detection boxes.
+        detection_multiclass_scores: [batch, max_detections,
+          num_classes_with_background] tensor with class score distribution for
+          post-processed detection boxes including background class if any.
        detection_classes: [batch, max_detections] tensor with classes for
          post-processed detection classes.
        detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
@@ -703,10 +706,13 @@ class SSDMetaArch(model.DetectionModel):
          feature_map_list.append(tf.reshape(feature_map, [batch_size, -1]))
        box_features = tf.concat(feature_map_list, 1)
        box_features = tf.identity(box_features, 'raw_box_features')
-      if detection_keypoints is not None:
      additional_fields = {
-            fields.BoxListFields.keypoints: detection_keypoints}
+          'multiclass_scores': detection_scores_with_background
+      }
+      if detection_keypoints is not None:
+        detection_keypoints = tf.identity(
+            detection_keypoints, 'raw_keypoint_locations')
+        additional_fields[fields.BoxListFields.keypoints] = detection_keypoints
      (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
       nmsed_additional_fields, num_detections) = self._non_max_suppression_fn(
           detection_boxes,
@@ -722,8 +728,10 @@ class SSDMetaArch(model.DetectionModel):
              nmsed_scores,
          fields.DetectionResultFields.detection_classes:
              nmsed_classes,
+          fields.DetectionResultFields.detection_multiclass_scores:
+              nmsed_additional_fields['multiclass_scores'],
          fields.DetectionResultFields.num_detections:
-              tf.to_float(num_detections),
+              tf.cast(num_detections, dtype=tf.float32),
          fields.DetectionResultFields.raw_detection_boxes:
              tf.squeeze(detection_boxes, axis=2),
          fields.DetectionResultFields.raw_detection_scores:
@@ -786,13 +794,13 @@ class SSDMetaArch(model.DetectionModel):
      if self._random_example_sampler:
        batch_cls_per_anchor_weights = tf.reduce_mean(
            batch_cls_weights, axis=-1)
-        batch_sampled_indicator = tf.to_float(
+        batch_sampled_indicator = tf.cast(
            shape_utils.static_or_dynamic_map_fn(
                self._minibatch_subsample_fn,
                [batch_cls_targets, batch_cls_per_anchor_weights],
                dtype=tf.bool,
                parallel_iterations=self._parallel_iterations,
-                back_prop=True))
+                back_prop=True), dtype=tf.float32)
        batch_reg_weights = tf.multiply(batch_sampled_indicator,
                                        batch_reg_weights)
        batch_cls_weights = tf.multiply(
@@ -868,7 +876,8 @@ class SSDMetaArch(model.DetectionModel):
      # Optionally normalize by number of positive matches
      normalizer = tf.constant(1.0, dtype=tf.float32)
      if self._normalize_loss_by_num_matches:
-        normalizer = tf.maximum(tf.to_float(tf.reduce_sum(batch_reg_weights)),
+        normalizer = tf.maximum(tf.cast(tf.reduce_sum(batch_reg_weights),
+                                        dtype=tf.float32),
                                1.0)
      localization_loss_normalizer = normalizer
@@ -883,8 +892,8 @@ class SSDMetaArch(model.DetectionModel):
                                        name='classification_loss')
      loss_dict = {
-          str(localization_loss.op.name): localization_loss,
+          'Loss/localization_loss': localization_loss,
-          str(classification_loss.op.name): classification_loss
+          'Loss/classification_loss': classification_loss
      }
@@ -1025,17 +1034,35 @@ class SSDMetaArch(model.DetectionModel):
        with rows of the Match objects corresponding to groundtruth boxes
        and columns corresponding to anchors.
    """
-    avg_num_gt_boxes = tf.reduce_mean(tf.to_float(tf.stack(
+    avg_num_gt_boxes = tf.reduce_mean(
-        [tf.shape(x)[0] for x in groundtruth_boxes_list])))
+        tf.cast(
-    avg_num_matched_gt_boxes = tf.reduce_mean(tf.to_float(tf.stack(
+            tf.stack([tf.shape(x)[0] for x in groundtruth_boxes_list]),
-        [match.num_matched_rows() for match in match_list])))
+            dtype=tf.float32))
-    avg_pos_anchors = tf.reduce_mean(tf.to_float(tf.stack(
+    avg_num_matched_gt_boxes = tf.reduce_mean(
-        [match.num_matched_columns() for match in match_list])))
+        tf.cast(
-    avg_neg_anchors = tf.reduce_mean(tf.to_float(tf.stack(
+            tf.stack([match.num_matched_rows() for match in match_list]),
-        [match.num_unmatched_columns() for match in match_list])))
+            dtype=tf.float32))
-    avg_ignored_anchors = tf.reduce_mean(tf.to_float(tf.stack(
+    avg_pos_anchors = tf.reduce_mean(
-        [match.num_ignored_columns() for match in match_list])))
+        tf.cast(
+            tf.stack([match.num_matched_columns() for match in match_list]),
+            dtype=tf.float32))
+    avg_neg_anchors = tf.reduce_mean(
+        tf.cast(
+            tf.stack([match.num_unmatched_columns() for match in match_list]),
+            dtype=tf.float32))
+    avg_ignored_anchors = tf.reduce_mean(
+        tf.cast(
+            tf.stack([match.num_ignored_columns() for match in match_list]),
+            dtype=tf.float32))
    # TODO(rathodv): Add a test for these summaries.
+    try:
+      # TODO(kaftan): Integrate these summaries into the v2 style loops
+      with tf.compat.v2.init_scope():
+        if tf.compat.v2.executing_eagerly():
+          return
+    except AttributeError:
+      pass
    tf.summary.scalar('AvgNumGroundtruthBoxesPerImage',
                      avg_num_gt_boxes,
                      family='TargetAssignment')

--- a/research/object_detection/meta_architectures/ssd_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch_test.py
@@ -176,6 +176,9 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
        ]
    ]  # padding
    expected_scores = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
+    expected_multiclass_scores = [[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
+                                  [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]]
    expected_classes = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
    expected_num_detections = np.array([3, 3])
@@ -198,6 +201,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
        detections = model.postprocess(prediction_dict, true_image_shapes)
        self.assertIn('detection_boxes', detections)
        self.assertIn('detection_scores', detections)
+        self.assertIn('detection_multiclass_scores', detections)
        self.assertIn('detection_classes', detections)
        self.assertIn('num_detections', detections)
        self.assertIn('raw_detection_boxes', detections)
@@ -217,6 +221,8 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
                expected_boxes[image_idx]))
      self.assertAllClose(detections_out['detection_scores'], expected_scores)
      self.assertAllClose(detections_out['detection_classes'], expected_classes)
+      self.assertAllClose(detections_out['detection_multiclass_scores'],
+                          expected_multiclass_scores)
      self.assertAllClose(detections_out['num_detections'],
                          expected_num_detections)
      self.assertAllEqual(detections_out['raw_detection_boxes'],
@@ -235,7 +241,8 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
                                      true_image_shapes)
      detections = model.postprocess(prediction_dict, true_image_shapes)
      return (detections['detection_boxes'], detections['detection_scores'],
-              detections['detection_classes'], detections['num_detections'])
+              detections['detection_classes'], detections['num_detections'],
+              detections['detection_multiclass_scores'])
    batch_size = 2
    image_size = 2
@@ -257,11 +264,14 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
        ]
    ]  # padding
    expected_scores = [[0, 0, 0, 0], [0, 0, 0, 0]]
+    expected_multiclass_scores = [[[0, 0], [0, 0], [0, 0], [0, 0]],
+                                  [[0, 0], [0, 0], [0, 0], [0, 0]]]
    expected_classes = [[0, 0, 0, 0], [0, 0, 0, 0]]
    expected_num_detections = np.array([3, 3])
    (detection_boxes, detection_scores, detection_classes,
-     num_detections) = self.execute(graph_fn, [input_image])
+     num_detections, detection_multiclass_scores) = self.execute(graph_fn,
+                                                                 [input_image])
    for image_idx in range(batch_size):
      self.assertTrue(test_utils.first_rows_close_as_set(
          detection_boxes[image_idx][
@@ -270,6 +280,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
      self.assertAllClose(
          detection_scores[image_idx][0:expected_num_detections[image_idx]],
          expected_scores[image_idx][0:expected_num_detections[image_idx]])
+      self.assertAllClose(
+          detection_multiclass_scores[image_idx]
+          [0:expected_num_detections[image_idx]],
+          expected_multiclass_scores[image_idx]
+          [0:expected_num_detections[image_idx]])
      self.assertAllClose(
          detection_classes[image_idx][0:expected_num_detections[image_idx]],
          expected_classes[image_idx][0:expected_num_detections[image_idx]])
@@ -600,8 +615,8 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
    with test_graph_detection.as_default():
      model, _, _, _ = self._create_model(use_keras=use_keras)
      inputs_shape = [2, 2, 2, 3]
-      inputs = tf.to_float(tf.random_uniform(
+      inputs = tf.cast(tf.random_uniform(
-          inputs_shape, minval=0, maxval=255, dtype=tf.int32))
+          inputs_shape, minval=0, maxval=255, dtype=tf.int32), dtype=tf.float32)
      preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
      prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
      model.postprocess(prediction_dict, true_image_shapes)
@@ -620,8 +635,9 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
    with test_graph_detection.as_default():
      model, _, _, _ = self._create_model(use_keras=use_keras)
      inputs_shape = [2, 2, 2, 3]
-      inputs = tf.to_float(
+      inputs = tf.cast(
-          tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32))
+          tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
+          dtype=tf.float32)
      preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
      prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
      model.postprocess(prediction_dict, true_image_shapes)

--- a/research/object_detection/metrics/calibration_metrics.py
+++ b/research/object_detection/metrics/calibration_metrics.py
@@ -98,13 +98,16 @@ def expected_calibration_error(y_true, y_pred, nbins=20):
  with tf.control_dependencies([bin_ids]):
    update_bin_counts_op = tf.assign_add(
-        bin_counts, tf.to_float(tf.bincount(bin_ids, minlength=nbins)))
+        bin_counts, tf.cast(tf.bincount(bin_ids, minlength=nbins),
+                            dtype=tf.float32))
    update_bin_true_sum_op = tf.assign_add(
        bin_true_sum,
-        tf.to_float(tf.bincount(bin_ids, weights=y_true, minlength=nbins)))
+        tf.cast(tf.bincount(bin_ids, weights=y_true, minlength=nbins),
+                dtype=tf.float32))
    update_bin_preds_sum_op = tf.assign_add(
        bin_preds_sum,
-        tf.to_float(tf.bincount(bin_ids, weights=y_pred, minlength=nbins)))
+        tf.cast(tf.bincount(bin_ids, weights=y_pred, minlength=nbins),
+                dtype=tf.float32))
  ece_update_op = _ece_from_bins(
      update_bin_counts_op,

--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -216,29 +216,23 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
                   for key, value in iter(box_metrics.items())}
    return box_metrics
-  def get_estimator_eval_metric_ops(self, eval_dict):
+  def add_eval_dict(self, eval_dict):
-    """Returns a dictionary of eval metric ops.
+    """Observes an evaluation result dict for a single example.
-    Note that once value_op is called, the detections and groundtruth added via
+    When executing eagerly, once all observations have been observed by this
-    update_op are cleared.
+    method you can use `.evaluate()` to get the final metrics.
-    This function can take in groundtruth and detections for a batch of images,
+    When using `tf.estimator.Estimator` for evaluation this function is used by
-    or for a single image. For the latter case, the batch dimension for input
+    `get_estimator_eval_metric_ops()` to construct the metric update op.
-    tensors need not be present.
    Args:
-      eval_dict: A dictionary that holds tensors for evaluating object detection
+      eval_dict: A dictionary that holds tensors for evaluating an object
-        performance. For single-image evaluation, this dictionary may be
+        detection model, returned from
-        produced from eval_util.result_dict_for_single_example(). If multi-image
+        eval_util.result_dict_for_single_example().
-        evaluation, `eval_dict` should contain the fields
-        'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to
-        properly unpad the tensors from the batch.
    Returns:
-      a dictionary of metric names to tuple of value_op and update_op that can
+      None when executing eagerly, or an update_op that can be used to update
-      be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all
+      the eval metrics in `tf.estimator.EstimatorSpec`.
-      update ops must be run together and similarly all value ops must be run
-      together to guarantee correct behaviour.
    """
    def update_op(
        image_id_batched,
@@ -328,7 +322,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
      if is_annotated is None:
        is_annotated = tf.ones_like(image_id, dtype=tf.bool)
-    update_op = tf.py_func(update_op, [image_id,
+    return tf.py_func(update_op, [image_id,
                                  groundtruth_boxes,
                                  groundtruth_classes,
                                  groundtruth_is_crowd,
@@ -338,6 +332,32 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
                                  detection_classes,
                                  num_det_boxes_per_image,
                                  is_annotated], [])
+  def get_estimator_eval_metric_ops(self, eval_dict):
+    """Returns a dictionary of eval metric ops.
+    Note that once value_op is called, the detections and groundtruth added via
+    update_op are cleared.
+    This function can take in groundtruth and detections for a batch of images,
+    or for a single image. For the latter case, the batch dimension for input
+    tensors need not be present.
+    Args:
+      eval_dict: A dictionary that holds tensors for evaluating object detection
+        performance. For single-image evaluation, this dictionary may be
+        produced from eval_util.result_dict_for_single_example(). If multi-image
+        evaluation, `eval_dict` should contain the fields
+        'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to
+        properly unpad the tensors from the batch.
+    Returns:
+      a dictionary of metric names to tuple of value_op and update_op that can
+      be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all
+      update ops must be run together and similarly all value ops must be run
+      together to guarantee correct behaviour.
+    """
+    update_op = self.add_eval_dict(eval_dict)
    metric_names = ['DetectionBoxes_Precision/mAP',
                    'DetectionBoxes_Precision/mAP@.50IOU',
                    'DetectionBoxes_Precision/mAP@.75IOU',

--- a/research/object_detection/metrics/oid_od_challenge_evaluation.py
+++ b/research/object_detection/metrics/oid_od_challenge_evaluation.py
@@ -14,6 +14,8 @@
 # ==============================================================================
 r"""Runs evaluation using OpenImages groundtruth and predictions.
+Uses Open Images Challenge 2018, 2019 metrics
 Example usage:
 python models/research/object_detection/metrics/oid_od_challenge_evaluation.py \
    --input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \
@@ -21,27 +23,50 @@ python models/research/object_detection/metrics/oid_od_challenge_evaluation.py \
    --input_class_labelmap=/path/to/input/class_labelmap.pbtxt \
    --input_predictions=/path/to/input/predictions.csv \
    --output_metrics=/path/to/output/metric.csv \
+    --input_annotations_segm=[/path/to/input/annotations-human-mask.csv] \
+If optional flag has_masks is True, Mask column is also expected in CSV.
-CSVs with bounding box annotations and image label (including the image URLs)
+CSVs with bounding box annotations, instance segmentations and image label
 can be downloaded from the Open Images Challenge website:
 https://storage.googleapis.com/openimages/web/challenge.html
 The format of the input csv and the metrics itself are described on the
-challenge website.
+challenge website as well.
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import argparse
+from absl import app
+from absl import flags
 import pandas as pd
 from google.protobuf import text_format
 from object_detection.metrics import io_utils
-from object_detection.metrics import oid_od_challenge_evaluation_utils as utils
+from object_detection.metrics import oid_challenge_evaluation_utils as utils
 from object_detection.protos import string_int_label_map_pb2
 from object_detection.utils import object_detection_evaluation
+flags.DEFINE_string('input_annotations_boxes', None,
+                    'File with groundtruth boxes annotations.')
+flags.DEFINE_string('input_annotations_labels', None,
+                    'File with groundtruth labels annotations.')
+flags.DEFINE_string(
+    'input_predictions', None,
+    """File with detection predictions; NOTE: no postprocessing is applied in the evaluation script."""
+)
+flags.DEFINE_string('input_class_labelmap', None,
+                    'Open Images Challenge labelmap.')
+flags.DEFINE_string('output_metrics', None, 'Output file with csv metrics.')
+flags.DEFINE_string(
+    'input_annotations_segm', None,
+    'File with groundtruth instance segmentation annotations [OPTIONAL].')
+FLAGS = flags.FLAGS
 def _load_labelmap(labelmap_path):
  """Loads labelmap from the labelmap path.
@@ -66,26 +91,43 @@ def _load_labelmap(labelmap_path):
  return labelmap_dict, categories
-def main(parsed_args):
+def main(unused_argv):
-  all_box_annotations = pd.read_csv(parsed_args.input_annotations_boxes)
+  flags.mark_flag_as_required('input_annotations_boxes')
-  all_label_annotations = pd.read_csv(parsed_args.input_annotations_labels)
+  flags.mark_flag_as_required('input_annotations_labels')
+  flags.mark_flag_as_required('input_predictions')
+  flags.mark_flag_as_required('input_class_labelmap')
+  flags.mark_flag_as_required('output_metrics')
+  all_location_annotations = pd.read_csv(FLAGS.input_annotations_boxes)
+  all_label_annotations = pd.read_csv(FLAGS.input_annotations_labels)
  all_label_annotations.rename(
      columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
-  all_annotations = pd.concat([all_box_annotations, all_label_annotations])
-  class_label_map, categories = _load_labelmap(parsed_args.input_class_labelmap)
+  is_instance_segmentation_eval = False
+  if FLAGS.input_annotations_segm:
+    is_instance_segmentation_eval = True
+    all_segm_annotations = pd.read_csv(FLAGS.input_annotations_segm)
+    # Note: this part is unstable as it requires the float point numbers in both
+    # csvs are exactly the same;
+    # Will be replaced by more stable solution: merge on LabelName and ImageID
+    # and filter down by IoU.
+    all_location_annotations = utils.merge_boxes_and_masks(
+        all_location_annotations, all_segm_annotations)
+  all_annotations = pd.concat([all_location_annotations, all_label_annotations])
+  class_label_map, categories = _load_labelmap(FLAGS.input_class_labelmap)
  challenge_evaluator = (
-      object_detection_evaluation.OpenImagesDetectionChallengeEvaluator(
+      object_detection_evaluation.OpenImagesChallengeEvaluator(
-          categories))
+          categories, evaluate_masks=is_instance_segmentation_eval))
  for _, groundtruth in enumerate(all_annotations.groupby('ImageID')):
    image_id, image_groundtruth = groundtruth
-    groundtruth_dictionary = utils.build_groundtruth_boxes_dictionary(
+    groundtruth_dictionary = utils.build_groundtruth_dictionary(
        image_groundtruth, class_label_map)
    challenge_evaluator.add_single_ground_truth_image_info(
        image_id, groundtruth_dictionary)
-  all_predictions = pd.read_csv(parsed_args.input_predictions)
+  all_predictions = pd.read_csv(FLAGS.input_predictions)
  for _, prediction_data in enumerate(all_predictions.groupby('ImageID')):
    image_id, image_predictions = prediction_data
    prediction_dictionary = utils.build_predictions_dictionary(
@@ -95,34 +137,9 @@ def main(parsed_args):
  metrics = challenge_evaluator.evaluate()
-  with open(parsed_args.output_metrics, 'w') as fid:
+  with open(FLAGS.output_metrics, 'w') as fid:
    io_utils.write_csv(fid, metrics)
 if __name__ == '__main__':
+  app.run(main)
-  parser = argparse.ArgumentParser(
-      description='Evaluate Open Images Object Detection Challenge predictions.'
-  )
-  parser.add_argument(
-      '--input_annotations_boxes',
-      required=True,
-      help='File with groundtruth boxes annotations.')
-  parser.add_argument(
-      '--input_annotations_labels',
-      required=True,
-      help='File with groundtruth labels annotations')
-  parser.add_argument(
-      '--input_predictions',
-      required=True,
-      help="""File with detection predictions; NOTE: no postprocessing is
-      applied in the evaluation script.""")
-  parser.add_argument(
-      '--input_class_labelmap',
-      required=True,
-      help='Open Images Challenge labelmap.')
-  parser.add_argument(
-      '--output_metrics', required=True, help='Output file with csv metrics')
-  args = parser.parse_args()
-  main(args)
--- a/research/object_detection/metrics/oid_od_challenge_evaluation_utils.py
+++ b/research/object_detection/metrics/oid_od_challenge_evaluation_utils.py
@@ -12,17 +12,92 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-r"""Converts data from CSV to the OpenImagesDetectionChallengeEvaluator format.
+r"""Converts data from CSV to the OpenImagesDetectionChallengeEvaluator format."""
-"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import numpy as np
+import pandas as pd
+from pycocotools import mask
 from object_detection.core import standard_fields
-def build_groundtruth_boxes_dictionary(data, class_label_map):
+def _to_normalized_box(mask_np):
+  """Decodes binary segmentation masks into np.arrays and boxes.
+  Args:
+    mask_np: np.ndarray of size NxWxH.
+  Returns:
+    a np.ndarray of the size Nx4, each row containing normalized coordinates
+    [YMin, XMin, YMax, XMax] of a box computed of axis parallel enclosing box of
+    a mask.
+  """
+  coord1, coord2 = np.nonzero(mask_np)
+  if coord1.size > 0:
+    ymin = float(min(coord1)) / mask_np.shape[0]
+    ymax = float(max(coord1) + 1) / mask_np.shape[0]
+    xmin = float(min(coord2)) / mask_np.shape[1]
+    xmax = float((max(coord2) + 1)) / mask_np.shape[1]
+    return np.array([ymin, xmin, ymax, xmax])
+  else:
+    return np.array([0.0, 0.0, 0.0, 0.0])
+def _decode_raw_data_into_masks_and_boxes(segments, image_widths,
+                                          image_heights):
+  """Decods binary segmentation masks into np.arrays and boxes.
+  Args:
+    segments: pandas Series object containing either None entries or strings
+    with COCO-encoded binary masks. All masks are expected to be the same size.
+    image_widths: pandas Series of mask widths.
+    image_heights: pandas Series of mask heights.
+  Returns:
+    a np.ndarray of the size NxWxH, where W and H is determined from the encoded
+    masks; for the None values, zero arrays of size WxH are created. if input
+    contains only None values, W=1, H=1.
+  """
+  segment_masks = []
+  segment_boxes = []
+  ind = segments.first_valid_index()
+  if ind is not None:
+    size = [int(image_heights.iloc[ind]), int(image_widths[ind])]
+  else:
+    # It does not matter which size we pick since no masks will ever be
+    # evaluated.
+    size = [1, 1]
+  for segment, im_width, im_height in zip(segments, image_widths,
+                                          image_heights):
+    if pd.isnull(segment):
+      segment_masks.append(np.zeros([1, size[0], size[1]], dtype=np.uint8))
+      segment_boxes.append(np.expand_dims(np.array([0.0, 0.0, 0.0, 0.0]), 0))
+    else:
+      encoding_dict = {'size': [im_height, im_width], 'counts': segment}
+      mask_tensor = mask.decode(encoding_dict)
+      segment_masks.append(np.expand_dims(mask_tensor, 0))
+      segment_boxes.append(np.expand_dims(_to_normalized_box(mask_tensor), 0))
+  return np.concatenate(
+      segment_masks, axis=0), np.concatenate(
+          segment_boxes, axis=0)
+def merge_boxes_and_masks(box_data, mask_data):
+  return pd.merge(
+      box_data,
+      mask_data,
+      how='outer',
+      on=['LabelName', 'ImageID', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf'])
+def build_groundtruth_dictionary(data, class_label_map):
  """Builds a groundtruth dictionary from groundtruth data in CSV file.
  Args:
@@ -44,21 +119,31 @@ def build_groundtruth_boxes_dictionary(data, class_label_map):
          M numpy boolean array denoting whether a groundtruth box contains a
          group of instances.
  """
-  data_boxes = data[data.ConfidenceImageLabel.isnull()]
+  data_location = data[data.XMin.notnull()]
-  data_labels = data[data.XMin.isnull()]
+  data_labels = data[data.ConfidenceImageLabel.notnull()]
-  return {
+  dictionary = {
      standard_fields.InputDataFields.groundtruth_boxes:
-          data_boxes[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(),
+          data_location[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(),
      standard_fields.InputDataFields.groundtruth_classes:
-          data_boxes['LabelName'].map(lambda x: class_label_map[x]).as_matrix(),
+          data_location['LabelName'].map(lambda x: class_label_map[x]
+                                        ).as_matrix(),
      standard_fields.InputDataFields.groundtruth_group_of:
-          data_boxes['IsGroupOf'].as_matrix().astype(int),
+          data_location['IsGroupOf'].as_matrix().astype(int),
      standard_fields.InputDataFields.groundtruth_image_classes:
-          data_labels['LabelName'].map(lambda x: class_label_map[x])
+          data_labels['LabelName'].map(lambda x: class_label_map[x]
-          .as_matrix(),
+                                      ).as_matrix(),
  }
+  if 'Mask' in data_location:
+    segments, _ = _decode_raw_data_into_masks_and_boxes(
+        data_location['Mask'], data_location['ImageWidth'],
+        data_location['ImageHeight'])
+    dictionary[
+        standard_fields.InputDataFields.groundtruth_instance_masks] = segments
+  return dictionary
 def build_predictions_dictionary(data, class_label_map):
  """Builds a predictions dictionary from predictions data in CSV file.
@@ -80,11 +165,21 @@ def build_predictions_dictionary(data, class_label_map):
          the boxes.
  """
-  return {
+  dictionary = {
-      standard_fields.DetectionResultFields.detection_boxes:
-          data[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(),
      standard_fields.DetectionResultFields.detection_classes:
          data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(),
      standard_fields.DetectionResultFields.detection_scores:
          data['Score'].as_matrix()
  }
+  if 'Mask' in data:
+    segments, boxes = _decode_raw_data_into_masks_and_boxes(
+        data['Mask'], data['ImageWidth'], data['ImageHeight'])
+    dictionary[standard_fields.DetectionResultFields.detection_masks] = segments
+    dictionary[standard_fields.DetectionResultFields.detection_boxes] = boxes
+  else:
+    dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[
+        'YMin', 'XMin', 'YMax', 'XMax'
+    ]].as_matrix()
+  return dictionary
--- a/research/object_detection/metrics/oid_challenge_evaluation_utils_test.py
+++ b/research/object_detection/metrics/oid_challenge_evaluation_utils_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for oid_od_challenge_evaluation_util."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import pandas as pd
+from pycocotools import mask
+import tensorflow as tf
+from object_detection.core import standard_fields
+from object_detection.metrics import oid_challenge_evaluation_utils as utils
+class OidUtilTest(tf.test.TestCase):
+  def testMaskToNormalizedBox(self):
+    mask_np = np.array([[0, 0, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0]])
+    box = utils._to_normalized_box(mask_np)
+    self.assertAllEqual(np.array([0.25, 0.25, 0.75, 0.5]), box)
+    mask_np = np.array([[0, 0, 0, 0], [0, 1, 0, 1], [0, 1, 0, 1], [0, 1, 1, 1]])
+    box = utils._to_normalized_box(mask_np)
+    self.assertAllEqual(np.array([0.25, 0.25, 1.0, 1.0]), box)
+    mask_np = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
+    box = utils._to_normalized_box(mask_np)
+    self.assertAllEqual(np.array([0.0, 0.0, 0.0, 0.0]), box)
+  def testDecodeToTensors(self):
+    mask1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0]], dtype=np.uint8)
+    mask2 = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=np.uint8)
+    encoding1 = mask.encode(np.asfortranarray(mask1))
+    encoding2 = mask.encode(np.asfortranarray(mask2))
+    vals = pd.Series([encoding1['counts'], encoding2['counts']])
+    image_widths = pd.Series([mask1.shape[1], mask2.shape[1]])
+    image_heights = pd.Series([mask1.shape[0], mask2.shape[0]])
+    segm, bbox = utils._decode_raw_data_into_masks_and_boxes(
+        vals, image_widths, image_heights)
+    expected_segm = np.concatenate(
+        [np.expand_dims(mask1, 0),
+         np.expand_dims(mask2, 0)], axis=0)
+    expected_bbox = np.array([[0.0, 0.5, 2.0 / 3.0, 1.0], [0, 0, 0, 0]])
+    self.assertAllEqual(expected_segm, segm)
+    self.assertAllEqual(expected_bbox, bbox)
+class OidChallengeEvaluationUtilTest(tf.test.TestCase):
+  def testBuildGroundtruthDictionaryBoxes(self):
+    np_data = pd.DataFrame(
+        [['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 1, None],
+         ['fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0, None],
+         ['fe58ec1b06db2bb7', '/m/04bcr3', None, None, None, None, None, 1],
+         ['fe58ec1b06db2bb7', '/m/083vt', None, None, None, None, None, 0],
+         ['fe58ec1b06db2bb7', '/m/02gy9n', None, None, None, None, None, 1]],
+        columns=[
+            'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf',
+            'ConfidenceImageLabel'
+        ])
+    class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
+    groundtruth_dictionary = utils.build_groundtruth_dictionary(
+        np_data, class_label_map)
+    self.assertIn(standard_fields.InputDataFields.groundtruth_boxes,
+                  groundtruth_dictionary)
+    self.assertIn(standard_fields.InputDataFields.groundtruth_classes,
+                  groundtruth_dictionary)
+    self.assertIn(standard_fields.InputDataFields.groundtruth_group_of,
+                  groundtruth_dictionary)
+    self.assertIn(standard_fields.InputDataFields.groundtruth_image_classes,
+                  groundtruth_dictionary)
+    self.assertAllEqual(
+        np.array([1, 3]), groundtruth_dictionary[
+            standard_fields.InputDataFields.groundtruth_classes])
+    self.assertAllEqual(
+        np.array([1, 0]), groundtruth_dictionary[
+            standard_fields.InputDataFields.groundtruth_group_of])
+    expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]])
+    self.assertNDArrayNear(
+        expected_boxes_data, groundtruth_dictionary[
+            standard_fields.InputDataFields.groundtruth_boxes], 1e-5)
+    self.assertAllEqual(
+        np.array([1, 2, 3]), groundtruth_dictionary[
+            standard_fields.InputDataFields.groundtruth_image_classes])
+  def testBuildPredictionDictionaryBoxes(self):
+    np_data = pd.DataFrame(
+        [['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 0.1],
+         ['fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0.2],
+         ['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.1, 0.2, 0.3, 0.3]],
+        columns=[
+            'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'Score'
+        ])
+    class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
+    prediction_dictionary = utils.build_predictions_dictionary(
+        np_data, class_label_map)
+    self.assertIn(standard_fields.DetectionResultFields.detection_boxes,
+                  prediction_dictionary)
+    self.assertIn(standard_fields.DetectionResultFields.detection_classes,
+                  prediction_dictionary)
+    self.assertIn(standard_fields.DetectionResultFields.detection_scores,
+                  prediction_dictionary)
+    self.assertAllEqual(
+        np.array([1, 3, 1]), prediction_dictionary[
+            standard_fields.DetectionResultFields.detection_classes])
+    expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2],
+                                    [0.2, 0.0, 0.3, 0.1]])
+    self.assertNDArrayNear(
+        expected_boxes_data, prediction_dictionary[
+            standard_fields.DetectionResultFields.detection_boxes], 1e-5)
+    self.assertNDArrayNear(
+        np.array([0.1, 0.2, 0.3]), prediction_dictionary[
+            standard_fields.DetectionResultFields.detection_scores], 1e-5)
+  def testBuildGroundtruthDictionaryMasks(self):
+    mask1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]],
+                     dtype=np.uint8)
+    mask2 = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+                     dtype=np.uint8)
+    encoding1 = mask.encode(np.asfortranarray(mask1))
+    encoding2 = mask.encode(np.asfortranarray(mask2))
+    np_data = pd.DataFrame(
+        [[
+            'fe58ec1b06db2bb7', mask1.shape[1], mask1.shape[0], '/m/04bcr3',
+            0.0, 0.3, 0.5, 0.6, 0, None, encoding1['counts']
+        ],
+         [
+             'fe58ec1b06db2bb7', None, None, '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 1,
+             None, None
+         ],
+         [
+             'fe58ec1b06db2bb7', mask2.shape[1], mask2.shape[0], '/m/02gy9n',
+             0.5, 0.6, 0.8, 0.9, 0, None, encoding2['counts']
+         ],
+         [
+             'fe58ec1b06db2bb7', None, None, '/m/04bcr3', None, None, None,
+             None, None, 1, None
+         ],
+         [
+             'fe58ec1b06db2bb7', None, None, '/m/083vt', None, None, None, None,
+             None, 0, None
+         ],
+         [
+             'fe58ec1b06db2bb7', None, None, '/m/02gy9n', None, None, None,
+             None, None, 1, None
+         ]],
+        columns=[
+            'ImageID', 'ImageWidth', 'ImageHeight', 'LabelName', 'XMin', 'XMax',
+            'YMin', 'YMax', 'IsGroupOf', 'ConfidenceImageLabel', 'Mask'
+        ])
+    class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
+    groundtruth_dictionary = utils.build_groundtruth_dictionary(
+        np_data, class_label_map)
+    self.assertIn(standard_fields.InputDataFields.groundtruth_boxes,
+                  groundtruth_dictionary)
+    self.assertIn(standard_fields.InputDataFields.groundtruth_classes,
+                  groundtruth_dictionary)
+    self.assertIn(standard_fields.InputDataFields.groundtruth_group_of,
+                  groundtruth_dictionary)
+    self.assertIn(standard_fields.InputDataFields.groundtruth_image_classes,
+                  groundtruth_dictionary)
+    self.assertIn(standard_fields.InputDataFields.groundtruth_instance_masks,
+                  groundtruth_dictionary)
+    self.assertAllEqual(
+        np.array([1, 3, 3]), groundtruth_dictionary[
+            standard_fields.InputDataFields.groundtruth_classes])
+    self.assertAllEqual(
+        np.array([0, 1, 0]), groundtruth_dictionary[
+            standard_fields.InputDataFields.groundtruth_group_of])
+    expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2],
+                                    [0.8, 0.5, 0.9, 0.6]])
+    self.assertNDArrayNear(
+        expected_boxes_data, groundtruth_dictionary[
+            standard_fields.InputDataFields.groundtruth_boxes], 1e-5)
+    self.assertAllEqual(
+        np.array([1, 2, 3]), groundtruth_dictionary[
+            standard_fields.InputDataFields.groundtruth_image_classes])
+    expected_segm = np.concatenate([
+        np.expand_dims(mask1, 0),
+        np.zeros((1, 4, 4), dtype=np.uint8),
+        np.expand_dims(mask2, 0)
+    ],
+                                   axis=0)
+    self.assertAllEqual(
+        expected_segm, groundtruth_dictionary[
+            standard_fields.InputDataFields.groundtruth_instance_masks])
+  def testBuildPredictionDictionaryMasks(self):
+    mask1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]],
+                     dtype=np.uint8)
+    mask2 = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+                     dtype=np.uint8)
+    encoding1 = mask.encode(np.asfortranarray(mask1))
+    encoding2 = mask.encode(np.asfortranarray(mask2))
+    np_data = pd.DataFrame(
+        [[
+            'fe58ec1b06db2bb7', mask1.shape[1], mask1.shape[0], '/m/04bcr3',
+            encoding1['counts'], 0.8
+        ],
+         [
+             'fe58ec1b06db2bb7', mask2.shape[1], mask2.shape[0], '/m/02gy9n',
+             encoding2['counts'], 0.6
+         ]],
+        columns=[
+            'ImageID', 'ImageWidth', 'ImageHeight', 'LabelName', 'Mask', 'Score'
+        ])
+    class_label_map = {'/m/04bcr3': 1, '/m/02gy9n': 3}
+    prediction_dictionary = utils.build_predictions_dictionary(
+        np_data, class_label_map)
+    self.assertIn(standard_fields.DetectionResultFields.detection_boxes,
+                  prediction_dictionary)
+    self.assertIn(standard_fields.DetectionResultFields.detection_classes,
+                  prediction_dictionary)
+    self.assertIn(standard_fields.DetectionResultFields.detection_scores,
+                  prediction_dictionary)
+    self.assertIn(standard_fields.DetectionResultFields.detection_masks,
+                  prediction_dictionary)
+    self.assertAllEqual(
+        np.array([1, 3]), prediction_dictionary[
+            standard_fields.DetectionResultFields.detection_classes])
+    expected_boxes_data = np.array([[0.0, 0.5, 0.5, 1.0], [0, 0, 0, 0]])
+    self.assertNDArrayNear(
+        expected_boxes_data, prediction_dictionary[
+            standard_fields.DetectionResultFields.detection_boxes], 1e-5)
+    self.assertNDArrayNear(
+        np.array([0.8, 0.6]), prediction_dictionary[
+            standard_fields.DetectionResultFields.detection_scores], 1e-5)
+    expected_segm = np.concatenate(
+        [np.expand_dims(mask1, 0),
+         np.expand_dims(mask2, 0)], axis=0)
+    self.assertAllEqual(
+        expected_segm, prediction_dictionary[
+            standard_fields.DetectionResultFields.detection_masks])
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/metrics/oid_od_challenge_evaluation_utils_test.py
+++ b/research/object_detection/metrics/oid_od_challenge_evaluation_utils_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for oid_od_challenge_evaluation_util."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-import pandas as pd
-import tensorflow as tf
-from object_detection.core import standard_fields
-from object_detection.metrics import oid_od_challenge_evaluation_utils as utils
-class OidOdChallengeEvaluationUtilTest(tf.test.TestCase):
-  def testBuildGroundtruthDictionary(self):
-    np_data = pd.DataFrame(
-        [['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 1, None], [
-            'fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0, None
-        ], ['fe58ec1b06db2bb7', '/m/04bcr3', None, None, None, None, None, 1], [
-            'fe58ec1b06db2bb7', '/m/083vt', None, None, None, None, None, 0
-        ], ['fe58ec1b06db2bb7', '/m/02gy9n', None, None, None, None, None, 1]],
-        columns=[
-            'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf',
-            'ConfidenceImageLabel'
-        ])
-    class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
-    groundtruth_dictionary = utils.build_groundtruth_boxes_dictionary(
-        np_data, class_label_map)
-    self.assertTrue(standard_fields.InputDataFields.groundtruth_boxes in
-                    groundtruth_dictionary)
-    self.assertTrue(standard_fields.InputDataFields.groundtruth_classes in
-                    groundtruth_dictionary)
-    self.assertTrue(standard_fields.InputDataFields.groundtruth_group_of in
-                    groundtruth_dictionary)
-    self.assertTrue(standard_fields.InputDataFields.groundtruth_image_classes in
-                    groundtruth_dictionary)
-    self.assertAllEqual(
-        np.array([1, 3]), groundtruth_dictionary[
-            standard_fields.InputDataFields.groundtruth_classes])
-    self.assertAllEqual(
-        np.array([1, 0]), groundtruth_dictionary[
-            standard_fields.InputDataFields.groundtruth_group_of])
-    expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2]])
-    self.assertNDArrayNear(
-        expected_boxes_data, groundtruth_dictionary[
-            standard_fields.InputDataFields.groundtruth_boxes], 1e-5)
-    self.assertAllEqual(
-        np.array([1, 2, 3]), groundtruth_dictionary[
-            standard_fields.InputDataFields.groundtruth_image_classes])
-  def testBuildPredictionDictionary(self):
-    np_data = pd.DataFrame(
-        [['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.3, 0.5, 0.6, 0.1], [
-            'fe58ec1b06db2bb7', '/m/02gy9n', 0.1, 0.2, 0.3, 0.4, 0.2
-        ], ['fe58ec1b06db2bb7', '/m/04bcr3', 0.0, 0.1, 0.2, 0.3, 0.3]],
-        columns=[
-            'ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax', 'Score'
-        ])
-    class_label_map = {'/m/04bcr3': 1, '/m/083vt': 2, '/m/02gy9n': 3}
-    prediction_dictionary = utils.build_predictions_dictionary(
-        np_data, class_label_map)
-    self.assertTrue(standard_fields.DetectionResultFields.detection_boxes in
-                    prediction_dictionary)
-    self.assertTrue(standard_fields.DetectionResultFields.detection_classes in
-                    prediction_dictionary)
-    self.assertTrue(standard_fields.DetectionResultFields.detection_scores in
-                    prediction_dictionary)
-    self.assertAllEqual(
-        np.array([1, 3, 1]), prediction_dictionary[
-            standard_fields.DetectionResultFields.detection_classes])
-    expected_boxes_data = np.array([[0.5, 0.0, 0.6, 0.3], [0.3, 0.1, 0.4, 0.2],
-                                    [0.2, 0.0, 0.3, 0.1]])
-    self.assertNDArrayNear(
-        expected_boxes_data, prediction_dictionary[
-            standard_fields.DetectionResultFields.detection_boxes], 1e-5)
-    self.assertNDArrayNear(
-        np.array([0.1, 0.2, 0.3]), prediction_dictionary[
-            standard_fields.DetectionResultFields.detection_scores], 1e-5)
-if __name__ == '__main__':
-  tf.test.main()
--- a/research/object_detection/metrics/oid_vrd_challenge_evaluation.py
+++ b/research/object_detection/metrics/oid_vrd_challenge_evaluation.py
@@ -17,7 +17,7 @@ r"""Runs evaluation using OpenImages groundtruth and predictions.
 Example usage:
 python \
 models/research/object_detection/metrics/oid_vrd_challenge_evaluation.py \
-    --input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \
+    --input_annotations_vrd=/path/to/input/annotations-human-bbox.csv \
    --input_annotations_labels=/path/to/input/annotations-label.csv \
    --input_class_labelmap=/path/to/input/class_labelmap.pbtxt \
    --input_relationship_labelmap=/path/to/input/relationship_labelmap.pbtxt \
@@ -126,7 +126,7 @@ if __name__ == '__main__':
      description=
      'Evaluate Open Images Visual Relationship Detection predictions.')
  parser.add_argument(
-      '--input_annotations_boxes',
+      '--input_annotations_vrd',
      required=True,
      help='File with groundtruth vrd annotations.')
  parser.add_argument(

--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -187,6 +187,46 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
  return unbatched_tensor_dict
+def _provide_groundtruth(model, labels):
+  """Provides the labels to a model as groundtruth.
+  This helper function extracts the corresponding boxes, classes,
+  keypoints, weights, masks, etc. from the labels, and provides it
+  as groundtruth to the models.
+  Args:
+    model: The detection model to provide groundtruth to.
+    labels: The labels for the training or evaluation inputs.
+  """
+  gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
+  gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
+  gt_masks_list = None
+  if fields.InputDataFields.groundtruth_instance_masks in labels:
+    gt_masks_list = labels[
+        fields.InputDataFields.groundtruth_instance_masks]
+  gt_keypoints_list = None
+  if fields.InputDataFields.groundtruth_keypoints in labels:
+    gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
+  gt_weights_list = None
+  if fields.InputDataFields.groundtruth_weights in labels:
+    gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
+  gt_confidences_list = None
+  if fields.InputDataFields.groundtruth_confidences in labels:
+    gt_confidences_list = labels[
+        fields.InputDataFields.groundtruth_confidences]
+  gt_is_crowd_list = None
+  if fields.InputDataFields.groundtruth_is_crowd in labels:
+    gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
+  model.provide_groundtruth(
+      groundtruth_boxes_list=gt_boxes_list,
+      groundtruth_classes_list=gt_classes_list,
+      groundtruth_confidences_list=gt_confidences_list,
+      groundtruth_masks_list=gt_masks_list,
+      groundtruth_keypoints_list=gt_keypoints_list,
+      groundtruth_weights_list=gt_weights_list,
+      groundtruth_is_crowd_list=gt_is_crowd_list)
 def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
                    postprocess_on_cpu=False):
  """Creates a model function for `Estimator`.
@@ -247,33 +287,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
          labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)
    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
-      gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
+      _provide_groundtruth(detection_model, labels)
-      gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
-      gt_masks_list = None
-      if fields.InputDataFields.groundtruth_instance_masks in labels:
-        gt_masks_list = labels[
-            fields.InputDataFields.groundtruth_instance_masks]
-      gt_keypoints_list = None
-      if fields.InputDataFields.groundtruth_keypoints in labels:
-        gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
-      gt_weights_list = None
-      if fields.InputDataFields.groundtruth_weights in labels:
-        gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
-      gt_confidences_list = None
-      if fields.InputDataFields.groundtruth_confidences in labels:
-        gt_confidences_list = labels[
-            fields.InputDataFields.groundtruth_confidences]
-      gt_is_crowd_list = None
-      if fields.InputDataFields.groundtruth_is_crowd in labels:
-        gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
-      detection_model.provide_groundtruth(
-          groundtruth_boxes_list=gt_boxes_list,
-          groundtruth_classes_list=gt_classes_list,
-          groundtruth_confidences_list=gt_confidences_list,
-          groundtruth_masks_list=gt_masks_list,
-          groundtruth_keypoints_list=gt_keypoints_list,
-          groundtruth_weights_list=gt_weights_list,
-          groundtruth_is_crowd_list=gt_is_crowd_list)
    preprocessed_images = features[fields.InputDataFields.image]
    if use_tpu and train_config.use_bfloat16:

--- a/research/object_detection/models/keras_models/mobilenet_v2.py
+++ b/research/object_detection/models/keras_models/mobilenet_v2.py
@@ -225,6 +225,9 @@ class _LayersOverride(object):
    placeholder_with_default = tf.placeholder_with_default(
        input=input_tensor, shape=[None] + shape)
+    if tf.executing_eagerly():
+      return tf.keras.layers.Input(shape=shape)
+    else:
      return tf.keras.layers.Input(tensor=placeholder_with_default)
  # pylint: disable=unused-argument

--- a/research/object_detection/predictors/convolutional_box_predictor.py
+++ b/research/object_detection/predictors/convolutional_box_predictor.py
@@ -17,6 +17,7 @@
 import functools
 import tensorflow as tf
 from object_detection.core import box_predictor
+from object_detection.utils import shape_utils
 from object_detection.utils import static_shape
 slim = tf.contrib.slim
@@ -350,7 +351,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
                       'feature maps, found: {}'.format(
                           num_predictions_per_location_list))
    feature_channels = [
-        image_feature.shape[3].value for image_feature in image_features
+        shape_utils.get_dim_as_int(image_feature.shape[3])
+        for image_feature in image_features
    ]
    has_different_feature_channels = len(set(feature_channels)) > 1
    if has_different_feature_channels:

--- a/research/object_detection/predictors/convolutional_keras_box_predictor.py
+++ b/research/object_detection/predictors/convolutional_keras_box_predictor.py
@@ -19,6 +19,7 @@ import collections
 import tensorflow as tf
 from object_detection.core import box_predictor
+from object_detection.utils import shape_utils
 from object_detection.utils import static_shape
 keras = tf.keras.layers
@@ -371,7 +372,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
  def build(self, input_shapes):
    """Creates the variables of the layer."""
    feature_channels = [
-        input_shape[3].value for input_shape in input_shapes
+        shape_utils.get_dim_as_int(input_shape[3])
+        for input_shape in input_shapes
    ]
    has_different_feature_channels = len(set(feature_channels)) > 1
    if has_different_feature_channels:

--- a/research/object_detection/predictors/heads/keras_mask_head.py
+++ b/research/object_detection/predictors/heads/keras_mask_head.py
@@ -24,6 +24,7 @@ import tensorflow as tf
 from object_detection.predictors.heads import head
 from object_detection.utils import ops
+from object_detection.utils import shape_utils
 class ConvolutionalMaskHead(head.KerasHead):
@@ -254,8 +255,10 @@ class MaskRCNNMaskHead(head.KerasHead):
    if self._convolve_then_upsample:
      # Replace Transposed Convolution with a Nearest Neighbor upsampling step
      # followed by 3x3 convolution.
-      height_scale = self._mask_height / input_shapes[1].value
+      height_scale = self._mask_height / shape_utils.get_dim_as_int(
-      width_scale = self._mask_width / input_shapes[2].value
+          input_shapes[1])
+      width_scale = self._mask_width / shape_utils.get_dim_as_int(
+          input_shapes[2])
      # pylint: disable=g-long-lambda
      self._mask_predictor_layers.append(tf.keras.layers.Lambda(
          lambda features: ops.nearest_neighbor_upsampling(

--- a/research/object_detection/predictors/rfcn_box_predictor.py
+++ b/research/object_detection/predictors/rfcn_box_predictor.py
@@ -128,7 +128,7 @@ class RfcnBoxPredictor(box_predictor.BoxPredictor):
          crop_size=self._crop_size,
          num_spatial_bins=self._num_spatial_bins,
          global_pool=True)
-      box_encodings = tf.squeeze(box_encodings, squeeze_dims=[2, 3])
+      box_encodings = tf.squeeze(box_encodings, axis=[2, 3])
      box_encodings = tf.reshape(box_encodings,
                                 [batch_size * num_boxes, 1, self.num_classes,
                                  self._box_code_size])
@@ -149,7 +149,7 @@ class RfcnBoxPredictor(box_predictor.BoxPredictor):
              num_spatial_bins=self._num_spatial_bins,
              global_pool=True))
      class_predictions_with_background = tf.squeeze(
-          class_predictions_with_background, squeeze_dims=[2, 3])
+          class_predictions_with_background, axis=[2, 3])
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background,
          [batch_size * num_boxes, 1, total_classes])

--- a/research/object_detection/predictors/rfcn_keras_box_predictor.py
+++ b/research/object_detection/predictors/rfcn_keras_box_predictor.py
@@ -176,7 +176,7 @@ class RfcnKerasBoxPredictor(box_predictor.KerasBoxPredictor):
        crop_size=self._crop_size,
        num_spatial_bins=self._num_spatial_bins,
        global_pool=True)
-    box_encodings = tf.squeeze(box_encodings, squeeze_dims=[2, 3])
+    box_encodings = tf.squeeze(box_encodings, axis=[2, 3])
    box_encodings = tf.reshape(box_encodings,
                               [batch_size * num_boxes, 1, self.num_classes,
                                self._box_code_size])
@@ -193,7 +193,7 @@ class RfcnKerasBoxPredictor(box_predictor.KerasBoxPredictor):
            num_spatial_bins=self._num_spatial_bins,
            global_pool=True))
    class_predictions_with_background = tf.squeeze(
-        class_predictions_with_background, squeeze_dims=[2, 3])
+        class_predictions_with_background, axis=[2, 3])
    class_predictions_with_background = tf.reshape(
        class_predictions_with_background,
        [batch_size * num_boxes, 1, self._total_classes])

--- a/research/object_detection/protos/eval.proto
+++ b/research/object_detection/protos/eval.proto
@@ -76,4 +76,8 @@ message EvalConfig {
  // If True, additionally include per-category metrics.
  optional bool include_metrics_per_category = 24 [default=false];
+  // Recall range within which precision should be computed.
+  optional float recall_lower_bound = 26 [default = 0.0];
+  optional float recall_upper_bound = 27 [default = 1.0];
 }