Internal change

PiperOrigin-RevId: 398347703

Internal change
PiperOrigin-RevId: 398347703
2edd8ccd · A. Unique TensorFlower · b1d0bf77 · 2edd8ccd · 2edd8ccd · 2edd8ccd
Commit 2edd8ccd authored Sep 22, 2021 by A. Unique TensorFlower
5 changed files
--- a/official/requirements.txt
+++ b/official/requirements.txt
@@ -21,6 +21,7 @@ pyyaml>=5.1
 opencv-python-headless
 Pillow
 pycocotools
+waymo-open-dataset-tf-2-6-0
 # NLP related dependencies
 seqeval
 sentencepiece

--- a/official/vision/beta/configs/maskrcnn.py
+++ b/official/vision/beta/configs/maskrcnn.py
@@ -208,6 +208,10 @@ class MaskRCNNTask(cfg.TaskConfig):
  per_category_metrics: bool = False
  # If set, we only use masks for the specified class IDs.
  allowed_mask_class_ids: Optional[List[int]] = None
+  # If set, the COCO metrics will be computed.
+  use_coco_metrics: bool = True
+  # If set, the Waymo Open Dataset evaluator would be used.
+  use_wod_metrics: bool = False


 COCO_INPUT_PATH_BASE = 'coco'

--- a/official/vision/beta/dataloaders/maskrcnn_input.py
+++ b/official/vision/beta/dataloaders/maskrcnn_input.py
@@ -331,7 +331,7 @@ class Parser(parser.Parser):
        'source_id': data['source_id'],
        'height': data['height'],
        'width': data['width'],
-        'num_detections': tf.shape(data['groundtruth_classes']),
+        'num_detections': tf.shape(data['groundtruth_classes'])[0],
        'boxes': boxes,
        'classes': data['groundtruth_classes'],
        'areas': data['groundtruth_area'],

--- a/official/vision/beta/evaluation/wod_detection_evaluator.py
+++ b/official/vision/beta/evaluation/wod_detection_evaluator.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""2D detection evaluator for the Waymo Open Dataset."""
+import pprint
+from absl import logging
+
+import tensorflow as tf
+from official.vision.beta.ops import box_ops
+from waymo_open_dataset import label_pb2
+from waymo_open_dataset.metrics.python import wod_detection_evaluator
+from waymo_open_dataset.protos import breakdown_pb2
+from waymo_open_dataset.protos import metrics_pb2
+
+
+def get_2d_detection_default_config():
+  """Returns the config proto for WOD 2D detection Evaluation."""
+  config = metrics_pb2.Config()
+
+  config.breakdown_generator_ids.append(breakdown_pb2.Breakdown.OBJECT_TYPE)
+  difficulty = config.difficulties.add()
+  difficulty.levels.append(label_pb2.Label.LEVEL_1)
+  difficulty.levels.append(label_pb2.Label.LEVEL_2)
+  config.breakdown_generator_ids.append(breakdown_pb2.Breakdown.ALL_BUT_SIGN)
+  difficulty = config.difficulties.add()
+  difficulty.levels.append(label_pb2.Label.LEVEL_1)
+  difficulty.levels.append(label_pb2.Label.LEVEL_2)
+  config.matcher_type = metrics_pb2.MatcherProto.TYPE_HUNGARIAN
+  config.iou_thresholds.append(0.0)
+  config.iou_thresholds.append(0.7)
+  config.iou_thresholds.append(0.5)
+  config.iou_thresholds.append(0.5)
+  config.iou_thresholds.append(0.5)
+  config.box_type = label_pb2.Label.Box.TYPE_2D
+
+  for i in range(100):
+    config.score_cutoffs.append(i * 0.01)
+  config.score_cutoffs.append(1.0)
+
+  return config
+
+
+class WOD2dDetectionEvaluator(wod_detection_evaluator.WODDetectionEvaluator):
+  """WOD 2D detection evaluation metric class."""
+
+  def __init__(self, config=None):
+    if config is None:
+      config = get_2d_detection_default_config()
+    super().__init__(config=config)
+
+  def _remove_padding(self, tensor_dict, num_valid):
+    """Remove the paddings of the prediction/groundtruth data."""
+    result_tensor_dict = {}
+    gather_indices = tf.range(num_valid)
+    for k, v in tensor_dict.items():
+      if 'frame_id' in k:
+        result_tensor_dict[k] = tf.tile([v], [num_valid])
+      else:
+        result_tensor_dict[k] = tf.gather(v, gather_indices)
+    return result_tensor_dict
+
+  def update_state(self, groundtruths, predictions):
+    """Update the metrics state with prediction and groundtruth data.
+
+    Args:
+      groundtruths: a dictionary of Tensors including the fields below.
+        Required fields:
+          - source_id: a numpy array of int or string of shape [batch_size].
+          - num_detections: a numpy array of int of shape [batch_size].
+          - boxes: a numpy array of float of shape [batch_size, K, 4].
+          - classes: a numpy array of int of shape [batch_size, K].
+          - difficulties: a numpy array of int of shape [batch_size, K].
+
+      predictions: a dictionary of tensors including the fields below.
+        Required fields:
+          - source_id: a numpy array of int or string of shape [batch_size].
+          - image_info: a numpy array of float of shape [batch_size, 4, 2].
+          - num_detections: a numpy array of int of shape [batch_size].
+          - detection_boxes: a numpy array of float of shape [batch_size, K, 4].
+          - detection_classes: a numpy array of int of shape [batch_size, K].
+          - detection_scores: a numpy array of float of shape [batch_size, K].
+    """
+    # Preprocess potentially aggregated tensors.
+    for k, v in groundtruths.items():
+      if isinstance(v, tuple):
+        groundtruths[k] = tf.concat(v, axis=0)
+    for k, v in predictions.items():
+      if isinstance(v, tuple):
+        predictions[k] = tf.concat(v, axis=0)
+
+    # Change cyclists' type id from 3 to 4, where 3 is reserved for sign.
+    groundtruth_type = tf.cast(groundtruths['classes'], tf.uint8)
+    groundtruth_type = tf.where(
+        tf.equal(groundtruth_type, 3),
+        tf.ones_like(groundtruth_type) * 4, groundtruth_type)
+    prediction_type = tf.cast(predictions['detection_classes'], tf.uint8)
+    prediction_type = tf.where(
+        tf.equal(prediction_type, 3),
+        tf.ones_like(prediction_type) * 4, prediction_type)
+
+    # Rescale the detection boxes back to original scale.
+    image_scale = tf.tile(predictions['image_info'][:, 2:3, :], (1, 1, 2))
+    prediction_bbox = predictions['detection_boxes'] / image_scale
+
+    batch_size = tf.shape(groundtruths['source_id'])[0]
+
+    for i in tf.range(batch_size):
+      frame_groundtruths = {
+          'ground_truth_frame_id':
+              groundtruths['source_id'][i],
+          'ground_truth_bbox':
+              box_ops.yxyx_to_cycxhw(
+                  tf.cast(groundtruths['boxes'][i], tf.float32)),
+          'ground_truth_type':
+              groundtruth_type[i],
+          'ground_truth_difficulty':
+              tf.cast(groundtruths['difficulties'][i], tf.uint8),
+      }
+      frame_groundtruths = self._remove_padding(
+          frame_groundtruths, groundtruths['num_detections'][i])
+      frame_predictions = {
+          'prediction_frame_id':
+              groundtruths['source_id'][i],
+          'prediction_bbox':
+              box_ops.yxyx_to_cycxhw(
+                  tf.cast(prediction_bbox[i], tf.float32)),
+          'prediction_type':
+              prediction_type[i],
+          'prediction_score':
+              tf.cast(predictions['detection_scores'][i], tf.float32),
+          'prediction_overlap_nlz':
+              tf.zeros_like(predictions['detection_scores'][i], dtype=tf.bool)
+      }
+      frame_predictions = self._remove_padding(frame_predictions,
+                                               predictions['num_detections'][i])
+      super().update_state(frame_groundtruths, frame_predictions)
+
+  def evaluate(self):
+    """Compute the final metrics."""
+    ap, _, _, _, _ = super().evaluate()
+    metric_dict = {}
+    for i, name in enumerate(self._breakdown_names):
+      # Skip sign metrics in 2d detection task.
+      if 'SIGN' in name:
+        continue
+      metric_dict['WOD metrics/{}/AP'.format(name)] = ap[i]
+    pp = pprint.PrettyPrinter()
+    logging.info('WOD Detection Metrics: \n %s', pp.pformat(metric_dict))
+
+    return metric_dict
--- a/official/vision/beta/tasks/maskrcnn.py
+++ b/official/vision/beta/tasks/maskrcnn.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-"""RetinaNet task definition."""
+"""MaskRCNN task definition."""
 import os
 from typing import Any, Optional, List, Tuple, Mapping

@@ -28,6 +28,7 @@ from official.vision.beta.dataloaders import tf_example_decoder
 from official.vision.beta.dataloaders import tf_example_label_map_decoder
 from official.vision.beta.evaluation import coco_evaluator
 from official.vision.beta.evaluation import coco_utils
+from official.vision.beta.evaluation import wod_detection_evaluator
 from official.vision.beta.losses import maskrcnn_losses
 from official.vision.beta.modeling import factory

@@ -247,23 +248,8 @@ class MaskRCNNTask(base_task.Task):
    }
    return losses

-  def build_metrics(self, training: bool = True):
-    """Build detection metrics."""
-    metrics = []
-    if training:
-      metric_names = [
-          'total_loss',
-          'rpn_score_loss',
-          'rpn_box_loss',
-          'frcnn_cls_loss',
-          'frcnn_box_loss',
-          'mask_loss',
-          'model_loss'
-      ]
-      for name in metric_names:
-        metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
-
-    else:
+  def _build_coco_metrics(self):
+    """Build COCO metrics evaluator."""
    if (not self._task_config.model.include_mask
       ) or self._task_config.annotation_file:
      self.coco_metric = coco_evaluator.COCOEvaluator(
@@ -295,6 +281,28 @@ class MaskRCNNTask(base_task.Task):
          include_mask=self._task_config.model.include_mask,
          per_category_metrics=self._task_config.per_category_metrics)

+  def build_metrics(self, training: bool = True):
+    """Build detection metrics."""
+    metrics = []
+    if training:
+      metric_names = [
+          'total_loss',
+          'rpn_score_loss',
+          'rpn_box_loss',
+          'frcnn_cls_loss',
+          'frcnn_box_loss',
+          'mask_loss',
+          'model_loss'
+      ]
+      for name in metric_names:
+        metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
+
+    else:
+      if self._task_config.use_coco_metrics:
+        self._build_coco_metrics()
+      if self._task_config.use_wod_metrics:
+        self.wod_metric = wod_detection_evaluator.WOD2dDetectionEvaluator()
+
    return metrics

  def train_step(self,
@@ -376,6 +384,7 @@ class MaskRCNNTask(base_task.Task):
        training=False)

    logs = {self.loss: 0}
+    if self._task_config.use_coco_metrics:
      coco_model_outputs = {
          'detection_boxes': outputs['detection_boxes'],
          'detection_scores': outputs['detection_scores'],
@@ -388,19 +397,45 @@ class MaskRCNNTask(base_task.Task):
        coco_model_outputs.update({
            'detection_masks': outputs['detection_masks'],
        })
-    logs.update({
-        self.coco_metric.name: (labels['groundtruths'], coco_model_outputs)
-    })
+      logs.update(
+          {self.coco_metric.name: (labels['groundtruths'], coco_model_outputs)})
+
+    if self.task_config.use_wod_metrics:
+      wod_model_outputs = {
+          'detection_boxes': outputs['detection_boxes'],
+          'detection_scores': outputs['detection_scores'],
+          'detection_classes': outputs['detection_classes'],
+          'num_detections': outputs['num_detections'],
+          'source_id': labels['groundtruths']['source_id'],
+          'image_info': labels['image_info']
+      }
+      logs.update(
+          {self.wod_metric.name: (labels['groundtruths'], wod_model_outputs)})
    return logs

  def aggregate_logs(self, state=None, step_outputs=None):
+    if self._task_config.use_coco_metrics:
      if state is None:
        self.coco_metric.reset_states()
-      state = self.coco_metric
      self.coco_metric.update_state(
          step_outputs[self.coco_metric.name][0],
          step_outputs[self.coco_metric.name][1])
+    if self._task_config.use_wod_metrics:
+      if state is None:
+        self.wod_metric.reset_states()
+      self.wod_metric.update_state(
+          step_outputs[self.wod_metric.name][0],
+          step_outputs[self.wod_metric.name][1])
+    if state is None:
+      # Create an arbitrary state to indicate it's not the first step in the
+      # following calls to this function.
+      state = True
    return state

  def reduce_aggregated_logs(self, aggregated_logs, global_step=None):
-    return self.coco_metric.result()
+    logs = {}
+    if self._task_config.use_coco_metrics:
+      logs.update(self.coco_metric.result())
+    if self._task_config.use_wod_metrics:
+      logs.update(self.wod_metric.result())
+    return logs