Add export ability to lvis evaluation class to enable submissions to LVIS server.

PiperOrigin-RevId: 340366780

Add export ability to lvis evaluation class to enable submissions to LVIS server.
PiperOrigin-RevId: 340366780
b55044d8 · Jonathan Huang · TF Object Detection Team · 33cca6c3 · b55044d8 · b55044d8
Commit b55044d8 authored Nov 02, 2020 by Jonathan Huang Committed by TF Object Detection Team Nov 02, 2020
7 changed files
--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -1192,6 +1192,11 @@ def evaluator_options_from_eval_config(eval_config):
        key = super_category
        value = eval_config.super_categories[key].split(',')
        evaluator_options[eval_metric_fn_key]['super_categories'][key] = value
+      if eval_metric_fn_key == 'lvis_mask_metrics' and hasattr(
+          eval_config, 'export_path'):
+        evaluator_options[eval_metric_fn_key].update({
+            'export_path': eval_config.export_path
+        })

    elif eval_metric_fn_key == 'precision_at_recall_detection_metrics':
      evaluator_options[eval_metric_fn_key] = {

--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -1212,8 +1212,10 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
                'groundtruth_classes':
                    groundtruth_classes[:num_gt_box],
                'groundtruth_instance_masks':
-                    groundtruth_instance_masks[:num_gt_box][
-                        :original_image_shape[0], :original_image_shape[1]],
+                    groundtruth_instance_masks[
+                        :num_gt_box,
+                        :original_image_shape[0],
+                        :original_image_shape[1]],
                'groundtruth_is_crowd':
                    groundtruth_is_crowd[:num_gt_box]
            })
@@ -1221,8 +1223,10 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
            image_id, {
                'detection_scores': detection_scores[:num_det_box],
                'detection_classes': detection_classes[:num_det_box],
-                'detection_masks': detection_masks[:num_det_box][
-                    :original_image_shape[0], :original_image_shape[1]]
+                'detection_masks': detection_masks[
+                    :num_det_box,
+                    :original_image_shape[0],
+                    :original_image_shape[1]]
            })

    # Unpack items from the evaluation dictionary.

--- a/research/object_detection/metrics/lvis_evaluation.py
+++ b/research/object_detection/metrics/lvis_evaluation.py
@@ -40,13 +40,18 @@ class LVISMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
  """Class to evaluate LVIS mask metrics."""

  def __init__(self,
-               categories):
+               categories,
+               include_metrics_per_category=False,
+               export_path=None):
    """Constructor.

    Args:
      categories: A list of dicts, each of which has the following keys -
        'id': (required) an integer id uniquely identifying this category.
        'name': (required) string representing category name e.g., 'cat', 'dog'.
+      include_metrics_per_category: Additionally include per-category metrics
+        (this option is currently unsupported).
+      export_path: Path to export detections to LVIS compatible JSON format.
    """
    super(LVISMaskEvaluator, self).__init__(categories)
    self._image_ids_with_detections = set([])
@@ -57,6 +62,10 @@ class LVISMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
    self._image_id_to_mask_shape_map = {}
    self._image_id_to_verified_neg_classes = {}
    self._image_id_to_not_exhaustive_classes = {}
+    if include_metrics_per_category:
+      raise ValueError('include_metrics_per_category not yet supported '
+                       'for LVISMaskEvaluator.')
+    self._export_path = export_path

  def clear(self):
    """Clears the state to prepare for a fresh evaluation."""
@@ -86,10 +95,14 @@ class LVISMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
        InputDataFields.groundtruth_instance_masks: uint8 numpy array of shape
          [num_masks, image_height, image_width] containing groundtruth masks.
          The elements of the array must be in {0, 1}.
-        InputDataFields.groundtruth_verified_neg_classes: [num_classes]
-          float indicator vector with values in {0, 1}.
-        InputDataFields.groundtruth_not_exhaustive_classes: [num_classes]
-          float indicator vector with values in {0, 1}.
+        InputDataFields.groundtruth_verified_neg_classes: [num_classes + 1]
+          float indicator vector with values in {0, 1}. The length is
+          num_classes + 1 so as to be compatible with the 1-indexed groundtruth
+          classes.
+        InputDataFields.groundtruth_not_exhaustive_classes: [num_classes + 1]
+          float indicator vector with values in {0, 1}. The length is
+          num_classes + 1 so as to be compatible with the 1-indexed groundtruth
+          classes.
        InputDataFields.groundtruth_area (optional): float numpy array of
          shape [num_boxes] containing the area (in the original absolute
          coordinates) of the annotated object.
@@ -116,9 +129,9 @@ class LVISMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
        fields.InputDataFields.groundtruth_verified_neg_classes].shape
    not_exhaustive_classes_shape = groundtruth_dict[
        fields.InputDataFields.groundtruth_not_exhaustive_classes].shape
-    if verified_neg_classes_shape != (len(self._category_id_set),):
+    if verified_neg_classes_shape != (len(self._category_id_set) + 1,):
      raise ValueError('Invalid shape for verified_neg_classes_shape.')
-    if not_exhaustive_classes_shape != (len(self._category_id_set),):
+    if not_exhaustive_classes_shape != (len(self._category_id_set) + 1,):
      raise ValueError('Invalid shape for not_exhaustive_classes_shape.')
    self._image_id_to_verified_neg_classes[image_id] = np.flatnonzero(
        groundtruth_dict[
@@ -210,6 +223,9 @@ class LVISMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
    Returns:
      A dictionary holding
    """
+    if self._export_path:
+      tf.logging.info('Dumping detections to json.')
+      self.dump_detections_to_json_file(self._export_path)
    tf.logging.info('Performing evaluation on %d images.',
                    len(self._image_id_to_mask_shape_map.keys()))
    # pylint: disable=g-complex-comprehension
@@ -217,7 +233,7 @@ class LVISMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
        'annotations': self._groundtruth_list,
        'images': [
            {
-                'id': image_id,
+                'id': int(image_id),
                'height': shape[1],
                'width': shape[2],
                'neg_category_ids':
@@ -287,8 +303,10 @@ class LVISMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
                input_data_fields.groundtruth_classes:
                    groundtruth_classes[:num_gt_box],
                input_data_fields.groundtruth_instance_masks:
-                    groundtruth_instance_masks[:num_gt_box][
-                        :original_image_shape[0], :original_image_shape[1]],
+                    groundtruth_instance_masks[
+                        :num_gt_box,
+                        :original_image_shape[0],
+                        :original_image_shape[1]],
                input_data_fields.groundtruth_verified_neg_classes:
                    groundtruth_verified_neg_classes,
                input_data_fields.groundtruth_not_exhaustive_classes:
@@ -298,8 +316,10 @@ class LVISMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
            image_id, {
                'detection_scores': detection_scores[:num_det_box],
                'detection_classes': detection_classes[:num_det_box],
-                'detection_masks': detection_masks[:num_det_box][
-                    :original_image_shape[0], :original_image_shape[1]]
+                'detection_masks': detection_masks[
+                    :num_det_box,
+                    :original_image_shape[0],
+                    :original_image_shape[1]]
            })

    # Unpack items from the evaluation dictionary.

--- a/research/object_detection/metrics/lvis_evaluation_test.py
+++ b/research/object_detection/metrics/lvis_evaluation_test.py
@@ -59,19 +59,19 @@ class LvisMaskEvaluationTest(tf.test.TestCase):
    lvis_evaluator = lvis_evaluation.LVISMaskEvaluator(
        _get_categories_list())
    lvis_evaluator.add_single_ground_truth_image_info(
-        image_id='image1',
+        image_id=1,
        groundtruth_dict={
            fields.InputDataFields.groundtruth_boxes:
                np.array([[100., 100., 200., 200.]]),
            fields.InputDataFields.groundtruth_classes: np.array([1]),
            fields.InputDataFields.groundtruth_instance_masks: masks1,
            fields.InputDataFields.groundtruth_verified_neg_classes:
-                np.array([0, 0, 0]),
+                np.array([0, 0, 0, 0]),
            fields.InputDataFields.groundtruth_not_exhaustive_classes:
-                np.array([0, 0, 0])
+                np.array([0, 0, 0, 0])
        })
    lvis_evaluator.add_single_detected_image_info(
-        image_id='image1',
+        image_id=1,
        detections_dict={
            fields.DetectionResultFields.detection_masks: masks1,
            fields.DetectionResultFields.detection_scores:
@@ -80,19 +80,19 @@ class LvisMaskEvaluationTest(tf.test.TestCase):
            np.array([1])
        })
    lvis_evaluator.add_single_ground_truth_image_info(
-        image_id='image2',
+        image_id=2,
        groundtruth_dict={
            fields.InputDataFields.groundtruth_boxes:
            np.array([[50., 50., 100., 100.]]),
            fields.InputDataFields.groundtruth_classes: np.array([1]),
            fields.InputDataFields.groundtruth_instance_masks: masks2,
            fields.InputDataFields.groundtruth_verified_neg_classes:
-                np.array([0, 0, 0]),
+                np.array([0, 0, 0, 0]),
            fields.InputDataFields.groundtruth_not_exhaustive_classes:
-                np.array([0, 0, 0])
+                np.array([0, 0, 0, 0])
        })
    lvis_evaluator.add_single_detected_image_info(
-        image_id='image2',
+        image_id=2,
        detections_dict={
            fields.DetectionResultFields.detection_masks: masks2,
            fields.DetectionResultFields.detection_scores:
@@ -101,19 +101,19 @@ class LvisMaskEvaluationTest(tf.test.TestCase):
            np.array([1])
        })
    lvis_evaluator.add_single_ground_truth_image_info(
-        image_id='image3',
+        image_id=3,
        groundtruth_dict={
            fields.InputDataFields.groundtruth_boxes:
            np.array([[25., 25., 50., 50.]]),
            fields.InputDataFields.groundtruth_classes: np.array([1]),
            fields.InputDataFields.groundtruth_instance_masks: masks3,
            fields.InputDataFields.groundtruth_verified_neg_classes:
-                np.array([0, 0, 0]),
+                np.array([0, 0, 0, 0]),
            fields.InputDataFields.groundtruth_not_exhaustive_classes:
-                np.array([0, 0, 0])
+                np.array([0, 0, 0, 0])
        })
    lvis_evaluator.add_single_detected_image_info(
-        image_id='image3',
+        image_id=3,
        detections_dict={
            fields.DetectionResultFields.detection_masks: masks3,
            fields.DetectionResultFields.detection_scores:
@@ -130,7 +130,7 @@ class LVISMaskEvaluationPyFuncTest(tf.test.TestCase):

  def testAddEvalDict(self):
    lvis_evaluator = lvis_evaluation.LVISMaskEvaluator(_get_categories_list())
-    image_id = tf.constant('image1', dtype=tf.string)
+    image_id = tf.constant(1, dtype=tf.int32)
    groundtruth_boxes = tf.constant(
        np.array([[100., 100., 200., 200.], [50., 50., 100., 100.]]),
        dtype=tf.float32)
@@ -143,9 +143,9 @@ class LVISMaskEvaluationPyFuncTest(tf.test.TestCase):
    ]), dtype=tf.uint8)
    original_image_spatial_shapes = tf.constant([[120, 120], [120, 120]],
                                                dtype=tf.int32)
-    groundtruth_verified_neg_classes = tf.constant(np.array([0, 0, 0]),
+    groundtruth_verified_neg_classes = tf.constant(np.array([0, 0, 0, 0]),
                                                   dtype=tf.float32)
-    groundtruth_not_exhaustive_classes = tf.constant(np.array([0, 0, 0]),
+    groundtruth_not_exhaustive_classes = tf.constant(np.array([0, 0, 0, 0]),
                                                     dtype=tf.float32)
    detection_scores = tf.constant(np.array([.9, .8]), dtype=tf.float32)
    detection_classes = tf.constant(np.array([2, 1]), dtype=tf.float32)

--- a/research/object_detection/metrics/lvis_tools.py
+++ b/research/object_detection/metrics/lvis_tools.py
@@ -134,7 +134,7 @@ def ExportSingleImageGroundtruthToLVIS(image_id,
  groundtruth bounding box.

  Args:
-    image_id: a unique image identifier either of type integer or string.
+    image_id: a unique image identifier castable to integer.
    next_annotation_id: integer specifying the first id to use for the
      groundtruth annotations. All annotations are assigned a continuous integer
      id starting from this value.
@@ -187,7 +187,7 @@ def ExportSingleImageGroundtruthToLVIS(image_id,
          'id':
              next_annotation_id + i,
          'image_id':
-              image_id,
+              int(image_id),
          'category_id':
              int(groundtruth_classes[i]),
          'bbox':
@@ -215,7 +215,7 @@ def ExportSingleImageDetectionMasksToLVIS(image_id,
    are associated with the same annotation.

  Args:
-    image_id: unique image identifier either of type integer or string.
+    image_id: unique image identifier castable to integer.
    category_id_set: A set of valid class ids. Detections with classes not in
      category_id_set are dropped.
    detection_masks: uint8 numpy array of shape [num_detections, image_height,
@@ -251,9 +251,10 @@ def ExportSingleImageDetectionMasksToLVIS(image_id,
  for i in range(num_boxes):
    if detection_classes[i] in category_id_set:
      detections_list.append({
-          'image_id': image_id,
+          'image_id': int(image_id),
          'category_id': int(detection_classes[i]),
          'segmentation': RleCompress(detection_masks[i]),
          'score': float(detection_scores[i])
      })
+
  return detections_list
--- a/research/object_detection/metrics/lvis_tools_test.py
+++ b/research/object_detection/metrics/lvis_tools_test.py
@@ -35,7 +35,7 @@ class LVISToolsTest(tf.test.TestCase):
    groundtruth_annotations_list = [
        {
            'id': 1,
-            'image_id': 'first',
+            'image_id': 1,
            'category_id': 1,
            'bbox': [100., 100., 100., 100.],
            'area': 100.**2,
@@ -43,7 +43,7 @@ class LVISToolsTest(tf.test.TestCase):
        },
        {
            'id': 2,
-            'image_id': 'second',
+            'image_id': 2,
            'category_id': 1,
            'bbox': [50., 50., 50., 50.],
            'area': 50.**2,
@@ -52,14 +52,14 @@ class LVISToolsTest(tf.test.TestCase):
    ]
    image_list = [
        {
-            'id': 'first',
+            'id': 1,
            'neg_category_ids': [],
            'not_exhaustive_category_ids': [],
            'height': 256,
            'width': 256
        },
        {
-            'id': 'second',
+            'id': 2,
            'neg_category_ids': [],
            'not_exhaustive_category_ids': [],
            'height': 256,
@@ -77,13 +77,13 @@ class LVISToolsTest(tf.test.TestCase):

    self._detections_list = [
        {
-            'image_id': 'first',
+            'image_id': 1,
            'category_id': 1,
            'segmentation': mask1_rle,
            'score': .8
        },
        {
-            'image_id': 'second',
+            'image_id': 2,
            'category_id': 1,
            'segmentation': mask2_rle,
            'score': .7
@@ -106,7 +106,7 @@ class LVISToolsTest(tf.test.TestCase):
    classes = np.array([1, 2, 3], dtype=np.int32)
    scores = np.array([0.8, 0.2, 0.7], dtype=np.float32)
    lvis_annotations = lvis_tools.ExportSingleImageDetectionMasksToLVIS(
-        image_id='first_image',
+        image_id=1,
        category_id_set=set([1, 2, 3]),
        detection_classes=classes,
        detection_scores=scores,
@@ -117,7 +117,7 @@ class LVISToolsTest(tf.test.TestCase):
                       expected_counts[i])
      self.assertTrue(np.all(np.equal(mask.decode(
          mask_annotation['segmentation']), masks[i])))
-      self.assertEqual(mask_annotation['image_id'], 'first_image')
+      self.assertEqual(mask_annotation['image_id'], 1)
      self.assertEqual(mask_annotation['category_id'], classes[i])
      self.assertAlmostEqual(mask_annotation['score'], scores[i])

@@ -137,7 +137,7 @@ class LVISToolsTest(tf.test.TestCase):
    expected_counts = ['04', '31', '4']

    lvis_annotations = lvis_tools.ExportSingleImageGroundtruthToLVIS(
-        image_id='first_image',
+        image_id=1,
        category_id_set=set([1, 2, 3]),
        next_annotation_id=next_annotation_id,
        groundtruth_boxes=boxes,
@@ -149,7 +149,7 @@ class LVISToolsTest(tf.test.TestCase):
      self.assertTrue(np.all(np.equal(mask.decode(
          annotation['segmentation']), masks[i])))
      self.assertTrue(np.all(np.isclose(annotation['bbox'], lvis_boxes[i])))
-      self.assertEqual(annotation['image_id'], 'first_image')
+      self.assertEqual(annotation['image_id'], 1)
      self.assertEqual(annotation['category_id'], classes[i])
      self.assertEqual(annotation['id'], i + next_annotation_id)


--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -772,9 +772,12 @@ def eager_eval_loop(
      true_image_shapes = None
      original_image_spatial_shapes = None

+    keys = features[inputs.HASH_KEY]
+    if eval_input_config.include_source_id:
+      keys = features[fields.InputDataFields.source_id]
    eval_dict = eval_util.result_dict_for_batched_example(
        eval_images,
-        features[inputs.HASH_KEY],
+        keys,
        detections,
        groundtruth,
        class_agnostic=class_agnostic,