Merge remote-tracking branch 'upstream/master' into fasterrcnn_fpn_keras_feature_extractor

b1025b3b · syiming · 69ce1c45 · e9df75ab · b1025b3b · b1025b3b
Commit b1025b3b authored Jun 18, 2020 by syiming
20 changed files
--- a/research/object_detection/metrics/calibration_evaluation_test.py
+++ b/research/object_detection/metrics/calibration_evaluation_test.py
@@ -18,9 +18,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import unittest
 import tensorflow.compat.v1 as tf
 from object_detection.core import standard_fields
 from object_detection.metrics import calibration_evaluation
+from object_detection.utils import tf_version
 def _get_categories_list():
@@ -36,6 +38,7 @@ def _get_categories_list():
  }]
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class CalibrationDetectionEvaluationTest(tf.test.TestCase):
  def _get_ece(self, ece_op, update_op):

--- a/research/object_detection/metrics/calibration_metrics_test.py
+++ b/research/object_detection/metrics/calibration_metrics_test.py
@@ -18,11 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import unittest
 import numpy as np
 import tensorflow.compat.v1 as tf
 from object_detection.metrics import calibration_metrics
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class CalibrationLibTest(tf.test.TestCase):
  @staticmethod

--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -24,6 +24,7 @@ import tensorflow.compat.v1 as tf
 from object_detection.core import standard_fields
 from object_detection.metrics import coco_tools
 from object_detection.utils import json_utils
+from object_detection.utils import np_mask_ops
 from object_detection.utils import object_detection_evaluation
@@ -1263,3 +1264,535 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
        eval_metric_ops[metric_name] = (tf.py_func(
            value_func_factory(metric_name), [], np.float32), update_op)
    return eval_metric_ops
+class CocoPanopticSegmentationEvaluator(
+    object_detection_evaluation.DetectionEvaluator):
+  """Class to evaluate PQ (panoptic quality) metric on COCO dataset.
+  More details about this metric: https://arxiv.org/pdf/1801.00868.pdf.
+  """
+  def __init__(self,
+               categories,
+               include_metrics_per_category=False,
+               iou_threshold=0.5,
+               ioa_threshold=0.5):
+    """Constructor.
+    Args:
+      categories: A list of dicts, each of which has the following keys -
+        'id': (required) an integer id uniquely identifying this category.
+        'name': (required) string representing category name e.g., 'cat', 'dog'.
+      include_metrics_per_category: If True, include metrics for each category.
+      iou_threshold: intersection-over-union threshold for mask matching (with
+        normal groundtruths).
+      ioa_threshold: intersection-over-area threshold for mask matching with
+        "is_crowd" groundtruths.
+    """
+    super(CocoPanopticSegmentationEvaluator, self).__init__(categories)
+    self._groundtruth_masks = {}
+    self._groundtruth_class_labels = {}
+    self._groundtruth_is_crowd = {}
+    self._predicted_masks = {}
+    self._predicted_class_labels = {}
+    self._include_metrics_per_category = include_metrics_per_category
+    self._iou_threshold = iou_threshold
+    self._ioa_threshold = ioa_threshold
+  def clear(self):
+    """Clears the state to prepare for a fresh evaluation."""
+    self._groundtruth_masks.clear()
+    self._groundtruth_class_labels.clear()
+    self._groundtruth_is_crowd.clear()
+    self._predicted_masks.clear()
+    self._predicted_class_labels.clear()
+  def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
+    """Adds groundtruth for a single image to be used for evaluation.
+    If the image has already been added, a warning is logged, and groundtruth is
+    ignored.
+    Args:
+      image_id: A unique string/integer identifier for the image.
+      groundtruth_dict: A dictionary containing -
+        InputDataFields.groundtruth_classes: integer numpy array of shape
+          [num_masks] containing 1-indexed groundtruth classes for the mask.
+        InputDataFields.groundtruth_instance_masks: uint8 numpy array of shape
+          [num_masks, image_height, image_width] containing groundtruth masks.
+          The elements of the array must be in {0, 1}.
+        InputDataFields.groundtruth_is_crowd (optional): integer numpy array of
+          shape [num_boxes] containing iscrowd flag for groundtruth boxes.
+    """
+    if image_id in self._groundtruth_masks:
+      tf.logging.warning(
+          'Ignoring groundtruth with image %s, since it has already been '
+          'added to the ground truth database.', image_id)
+      return
+    self._groundtruth_masks[image_id] = groundtruth_dict[
+        standard_fields.InputDataFields.groundtruth_instance_masks]
+    self._groundtruth_class_labels[image_id] = groundtruth_dict[
+        standard_fields.InputDataFields.groundtruth_classes]
+    groundtruth_is_crowd = groundtruth_dict.get(
+        standard_fields.InputDataFields.groundtruth_is_crowd)
+    # Drop groundtruth_is_crowd if empty tensor.
+    if groundtruth_is_crowd is not None and not groundtruth_is_crowd.size > 0:
+      groundtruth_is_crowd = None
+    if groundtruth_is_crowd is not None:
+      self._groundtruth_is_crowd[image_id] = groundtruth_is_crowd
+  def add_single_detected_image_info(self, image_id, detections_dict):
+    """Adds detections for a single image to be used for evaluation.
+    If a detection has already been added for this image id, a warning is
+    logged, and the detection is skipped.
+    Args:
+      image_id: A unique string/integer identifier for the image.
+      detections_dict: A dictionary containing -
+        DetectionResultFields.detection_classes: integer numpy array of shape
+          [num_masks] containing 1-indexed detection classes for the masks.
+        DetectionResultFields.detection_masks: optional uint8 numpy array of
+          shape [num_masks, image_height, image_width] containing instance
+          masks. The elements of the array must be in {0, 1}.
+    Raises:
+      ValueError: If results and groundtruth shape don't match.
+    """
+    if image_id not in self._groundtruth_masks:
+      raise ValueError('Missing groundtruth for image id: {}'.format(image_id))
+    detection_masks = detections_dict[
+        standard_fields.DetectionResultFields.detection_masks]
+    self._predicted_masks[image_id] = detection_masks
+    self._predicted_class_labels[image_id] = detections_dict[
+        standard_fields.DetectionResultFields.detection_classes]
+    groundtruth_mask_shape = self._groundtruth_masks[image_id].shape
+    if groundtruth_mask_shape[1:] != detection_masks.shape[1:]:
+      raise ValueError("The shape of results doesn't match groundtruth.")
+  def evaluate(self):
+    """Evaluates the detection masks and returns a dictionary of coco metrics.
+    Returns:
+      A dictionary holding -
+      1. summary_metric:
+      'PanopticQuality@%.2fIOU': mean panoptic quality averaged over classes at
+        the required IOU.
+      'SegmentationQuality@%.2fIOU': mean segmentation quality averaged over
+        classes at the required IOU.
+      'RecognitionQuality@%.2fIOU': mean recognition quality averaged over
+        classes at the required IOU.
+      'NumValidClasses': number of valid classes. A valid class should have at
+        least one normal (is_crowd=0) groundtruth mask or one predicted mask.
+      'NumTotalClasses': number of total classes.
+      2. per_category_pq: if include_metrics_per_category is True, category
+      specific results with keys of the form:
+      'PanopticQuality@%.2fIOU_ByCategory/category'.
+    """
+    # Evaluate and accumulate the iou/tp/fp/fn.
+    sum_tp_iou, sum_num_tp, sum_num_fp, sum_num_fn = self._evaluate_all_masks()
+    # Compute PQ metric for each category and average over all classes.
+    mask_metrics = self._compute_panoptic_metrics(sum_tp_iou, sum_num_tp,
+                                                  sum_num_fp, sum_num_fn)
+    return mask_metrics
+  def get_estimator_eval_metric_ops(self, eval_dict):
+    """Returns a dictionary of eval metric ops.
+    Note that once value_op is called, the detections and groundtruth added via
+    update_op are cleared.
+    Args:
+      eval_dict: A dictionary that holds tensors for evaluating object detection
+        performance. For single-image evaluation, this dictionary may be
+        produced from eval_util.result_dict_for_single_example(). If multi-image
+        evaluation, `eval_dict` should contain the fields
+        'num_gt_masks_per_image' and 'num_det_masks_per_image' to properly unpad
+        the tensors from the batch.
+    Returns:
+      a dictionary of metric names to tuple of value_op and update_op that can
+      be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all
+      update ops  must be run together and similarly all value ops must be run
+      together to guarantee correct behaviour.
+    """
+    def update_op(image_id_batched, groundtruth_classes_batched,
+                  groundtruth_instance_masks_batched,
+                  groundtruth_is_crowd_batched, num_gt_masks_per_image,
+                  detection_classes_batched, detection_masks_batched,
+                  num_det_masks_per_image):
+      """Update op for metrics."""
+      for (image_id, groundtruth_classes, groundtruth_instance_masks,
+           groundtruth_is_crowd, num_gt_mask, detection_classes,
+           detection_masks, num_det_mask) in zip(
+               image_id_batched, groundtruth_classes_batched,
+               groundtruth_instance_masks_batched, groundtruth_is_crowd_batched,
+               num_gt_masks_per_image, detection_classes_batched,
+               detection_masks_batched, num_det_masks_per_image):
+        self.add_single_ground_truth_image_info(
+            image_id, {
+                'groundtruth_classes':
+                    groundtruth_classes[:num_gt_mask],
+                'groundtruth_instance_masks':
+                    groundtruth_instance_masks[:num_gt_mask],
+                'groundtruth_is_crowd':
+                    groundtruth_is_crowd[:num_gt_mask]
+            })
+        self.add_single_detected_image_info(
+            image_id, {
+                'detection_classes': detection_classes[:num_det_mask],
+                'detection_masks': detection_masks[:num_det_mask]
+            })
+    # Unpack items from the evaluation dictionary.
+    (image_id, groundtruth_classes, groundtruth_instance_masks,
+     groundtruth_is_crowd, num_gt_masks_per_image, detection_classes,
+     detection_masks, num_det_masks_per_image
+    ) = self._unpack_evaluation_dictionary_items(eval_dict)
+    update_op = tf.py_func(update_op, [
+        image_id, groundtruth_classes, groundtruth_instance_masks,
+        groundtruth_is_crowd, num_gt_masks_per_image, detection_classes,
+        detection_masks, num_det_masks_per_image
+    ], [])
+    metric_names = [
+        'PanopticQuality@%.2fIOU' % self._iou_threshold,
+        'SegmentationQuality@%.2fIOU' % self._iou_threshold,
+        'RecognitionQuality@%.2fIOU' % self._iou_threshold
+    ]
+    if self._include_metrics_per_category:
+      for category_dict in self._categories:
+        metric_names.append('PanopticQuality@%.2fIOU_ByCategory/%s' %
+                            (self._iou_threshold, category_dict['name']))
+    def first_value_func():
+      self._metrics = self.evaluate()
+      self.clear()
+      return np.float32(self._metrics[metric_names[0]])
+    def value_func_factory(metric_name):
+      def value_func():
+        return np.float32(self._metrics[metric_name])
+      return value_func
+    # Ensure that the metrics are only evaluated once.
+    first_value_op = tf.py_func(first_value_func, [], tf.float32)
+    eval_metric_ops = {metric_names[0]: (first_value_op, update_op)}
+    with tf.control_dependencies([first_value_op]):
+      for metric_name in metric_names[1:]:
+        eval_metric_ops[metric_name] = (tf.py_func(
+            value_func_factory(metric_name), [], np.float32), update_op)
+    return eval_metric_ops
+  def _evaluate_all_masks(self):
+    """Evaluate all masks and compute sum iou/TP/FP/FN."""
+    sum_num_tp = {category['id']: 0 for category in self._categories}
+    sum_num_fp = sum_num_tp.copy()
+    sum_num_fn = sum_num_tp.copy()
+    sum_tp_iou = sum_num_tp.copy()
+    for image_id in self._groundtruth_class_labels:
+      # Separate normal and is_crowd groundtruth
+      crowd_gt_indices = self._groundtruth_is_crowd.get(image_id)
+      (normal_gt_masks, normal_gt_classes, crowd_gt_masks,
+       crowd_gt_classes) = self._separate_normal_and_crowd_labels(
+           crowd_gt_indices, self._groundtruth_masks[image_id],
+           self._groundtruth_class_labels[image_id])
+      # Mask matching to normal GT.
+      predicted_masks = self._predicted_masks[image_id]
+      predicted_class_labels = self._predicted_class_labels[image_id]
+      (overlaps, pred_matched,
+       gt_matched) = self._match_predictions_to_groundtruths(
+           predicted_masks,
+           predicted_class_labels,
+           normal_gt_masks,
+           normal_gt_classes,
+           self._iou_threshold,
+           is_crowd=False,
+           with_replacement=False)
+      # Accumulate true positives.
+      for (class_id, is_matched, overlap) in zip(predicted_class_labels,
+                                                 pred_matched, overlaps):
+        if is_matched:
+          sum_num_tp[class_id] += 1
+          sum_tp_iou[class_id] += overlap
+      # Accumulate false negatives.
+      for (class_id, is_matched) in zip(normal_gt_classes, gt_matched):
+        if not is_matched:
+          sum_num_fn[class_id] += 1
+      # Match remaining predictions to crowd gt.
+      remained_pred_indices = np.logical_not(pred_matched)
+      remained_pred_masks = predicted_masks[remained_pred_indices, :, :]
+      remained_pred_classes = predicted_class_labels[remained_pred_indices]
+      _, pred_matched, _ = self._match_predictions_to_groundtruths(
+          remained_pred_masks,
+          remained_pred_classes,
+          crowd_gt_masks,
+          crowd_gt_classes,
+          self._ioa_threshold,
+          is_crowd=True,
+          with_replacement=True)
+      # Accumulate false positives
+      for (class_id, is_matched) in zip(remained_pred_classes, pred_matched):
+        if not is_matched:
+          sum_num_fp[class_id] += 1
+    return sum_tp_iou, sum_num_tp, sum_num_fp, sum_num_fn
+  def _compute_panoptic_metrics(self, sum_tp_iou, sum_num_tp, sum_num_fp,
+                                sum_num_fn):
+    """Compute PQ metric for each category and average over all classes.
+    Args:
+      sum_tp_iou: dict, summed true positive intersection-over-union (IoU) for
+        each class, keyed by class_id.
+      sum_num_tp: the total number of true positives for each class, keyed by
+        class_id.
+      sum_num_fp: the total number of false positives for each class, keyed by
+        class_id.
+      sum_num_fn: the total number of false negatives for each class, keyed by
+        class_id.
+    Returns:
+      mask_metrics: a dictionary containing averaged metrics over all classes,
+        and per-category metrics if required.
+    """
+    mask_metrics = {}
+    sum_pq = 0
+    sum_sq = 0
+    sum_rq = 0
+    num_valid_classes = 0
+    for category in self._categories:
+      class_id = category['id']
+      (panoptic_quality, segmentation_quality,
+       recognition_quality) = self._compute_panoptic_metrics_single_class(
+           sum_tp_iou[class_id], sum_num_tp[class_id], sum_num_fp[class_id],
+           sum_num_fn[class_id])
+      if panoptic_quality is not None:
+        sum_pq += panoptic_quality
+        sum_sq += segmentation_quality
+        sum_rq += recognition_quality
+        num_valid_classes += 1
+        if self._include_metrics_per_category:
+          mask_metrics['PanopticQuality@%.2fIOU_ByCategory/%s' %
+                       (self._iou_threshold,
+                        category['name'])] = panoptic_quality
+    mask_metrics['PanopticQuality@%.2fIOU' %
+                 self._iou_threshold] = sum_pq / num_valid_classes
+    mask_metrics['SegmentationQuality@%.2fIOU' %
+                 self._iou_threshold] = sum_sq / num_valid_classes
+    mask_metrics['RecognitionQuality@%.2fIOU' %
+                 self._iou_threshold] = sum_rq / num_valid_classes
+    mask_metrics['NumValidClasses'] = num_valid_classes
+    mask_metrics['NumTotalClasses'] = len(self._categories)
+    return mask_metrics
+  def _compute_panoptic_metrics_single_class(self, sum_tp_iou, num_tp, num_fp,
+                                             num_fn):
+    """Compute panoptic metrics: panoptic/segmentation/recognition quality.
+    More computation details in https://arxiv.org/pdf/1801.00868.pdf.
+    Args:
+      sum_tp_iou: summed true positive intersection-over-union (IoU) for a
+        specific class.
+      num_tp: the total number of true positives for a specific class.
+      num_fp: the total number of false positives for a specific class.
+      num_fn: the total number of false negatives for a specific class.
+    Returns:
+      panoptic_quality: sum_tp_iou / (num_tp + 0.5*num_fp + 0.5*num_fn).
+      segmentation_quality: sum_tp_iou / num_tp.
+      recognition_quality: num_tp / (num_tp + 0.5*num_fp + 0.5*num_fn).
+    """
+    denominator = num_tp + 0.5 * num_fp + 0.5 * num_fn
+    # Calculate metric only if there is at least one GT or one prediction.
+    if denominator > 0:
+      recognition_quality = num_tp / denominator
+      if num_tp > 0:
+        segmentation_quality = sum_tp_iou / num_tp
+      else:
+        # If there is no TP for this category.
+        segmentation_quality = 0
+      panoptic_quality = segmentation_quality * recognition_quality
+      return panoptic_quality, segmentation_quality, recognition_quality
+    else:
+      return None, None, None
+  def _separate_normal_and_crowd_labels(self, crowd_gt_indices,
+                                        groundtruth_masks, groundtruth_classes):
+    """Separate normal and crowd groundtruth class_labels and masks.
+    Args:
+      crowd_gt_indices: None or array of shape [num_groundtruths]. If None, all
+        groundtruths are treated as normal ones.
+      groundtruth_masks: array of shape [num_groundtruths, height, width].
+      groundtruth_classes: array of shape [num_groundtruths].
+    Returns:
+      normal_gt_masks: array of shape [num_normal_groundtruths, height, width].
+      normal_gt_classes: array of shape [num_normal_groundtruths].
+      crowd_gt_masks: array of shape [num_crowd_groundtruths, height, width].
+      crowd_gt_classes: array of shape [num_crowd_groundtruths].
+    Raises:
+      ValueError: if the shape of groundtruth classes doesn't match groundtruth
+        masks or if the shape of crowd_gt_indices.
+    """
+    if groundtruth_masks.shape[0] != groundtruth_classes.shape[0]:
+      raise ValueError(
+          "The number of masks doesn't match the number of labels.")
+    if crowd_gt_indices is None:
+      # All gts are treated as normal
+      crowd_gt_indices = np.zeros(groundtruth_masks.shape, dtype=np.bool)
+    else:
+      if groundtruth_masks.shape[0] != crowd_gt_indices.shape[0]:
+        raise ValueError(
+            "The number of masks doesn't match the number of is_crowd labels.")
+      crowd_gt_indices = crowd_gt_indices.astype(np.bool)
+    normal_gt_indices = np.logical_not(crowd_gt_indices)
+    if normal_gt_indices.size:
+      normal_gt_masks = groundtruth_masks[normal_gt_indices, :, :]
+      normal_gt_classes = groundtruth_classes[normal_gt_indices]
+      crowd_gt_masks = groundtruth_masks[crowd_gt_indices, :, :]
+      crowd_gt_classes = groundtruth_classes[crowd_gt_indices]
+    else:
+      # No groundtruths available, groundtruth_masks.shape = (0, h, w)
+      normal_gt_masks = groundtruth_masks
+      normal_gt_classes = groundtruth_classes
+      crowd_gt_masks = groundtruth_masks
+      crowd_gt_classes = groundtruth_classes
+    return normal_gt_masks, normal_gt_classes, crowd_gt_masks, crowd_gt_classes
+  def _match_predictions_to_groundtruths(self,
+                                         predicted_masks,
+                                         predicted_classes,
+                                         groundtruth_masks,
+                                         groundtruth_classes,
+                                         matching_threshold,
+                                         is_crowd=False,
+                                         with_replacement=False):
+    """Match the predicted masks to groundtruths.
+    Args:
+      predicted_masks: array of shape [num_predictions, height, width].
+      predicted_classes: array of shape [num_predictions].
+      groundtruth_masks: array of shape [num_groundtruths, height, width].
+      groundtruth_classes: array of shape [num_groundtruths].
+      matching_threshold: if the overlap between a prediction and a groundtruth
+        is larger than this threshold, the prediction is true positive.
+      is_crowd: whether the groundtruths are crowd annotation or not. If True,
+        use intersection over area (IoA) as the overlapping metric; otherwise
+        use intersection over union (IoU).
+      with_replacement: whether a groundtruth can be matched to multiple
+        predictions. By default, for normal groundtruths, only 1-1 matching is
+        allowed for normal groundtruths; for crowd groundtruths, 1-to-many must
+        be allowed.
+    Returns:
+      best_overlaps: array of shape [num_predictions]. Values representing the
+      IoU
+        or IoA with best matched groundtruth.
+      pred_matched: array of shape [num_predictions]. Boolean value representing
+        whether the ith prediction is matched to a groundtruth.
+      gt_matched: array of shape [num_groundtruth]. Boolean value representing
+        whether the ith groundtruth is matched to a prediction.
+    Raises:
+      ValueError: if the shape of groundtruth/predicted masks doesn't match
+        groundtruth/predicted classes.
+    """
+    if groundtruth_masks.shape[0] != groundtruth_classes.shape[0]:
+      raise ValueError(
+          "The number of GT masks doesn't match the number of labels.")
+    if predicted_masks.shape[0] != predicted_classes.shape[0]:
+      raise ValueError(
+          "The number of predicted masks doesn't match the number of labels.")
+    gt_matched = np.zeros(groundtruth_classes.shape, dtype=np.bool)
+    pred_matched = np.zeros(predicted_classes.shape, dtype=np.bool)
+    best_overlaps = np.zeros(predicted_classes.shape)
+    for pid in range(predicted_classes.shape[0]):
+      best_overlap = 0
+      matched_gt_id = -1
+      for gid in range(groundtruth_classes.shape[0]):
+        if predicted_classes[pid] == groundtruth_classes[gid]:
+          if (not with_replacement) and gt_matched[gid]:
+            continue
+          if not is_crowd:
+            overlap = np_mask_ops.iou(predicted_masks[pid:pid + 1],
+                                      groundtruth_masks[gid:gid + 1])[0, 0]
+          else:
+            overlap = np_mask_ops.ioa(groundtruth_masks[gid:gid + 1],
+                                      predicted_masks[pid:pid + 1])[0, 0]
+          if overlap >= matching_threshold and overlap > best_overlap:
+            matched_gt_id = gid
+            best_overlap = overlap
+      if matched_gt_id >= 0:
+        gt_matched[matched_gt_id] = True
+        pred_matched[pid] = True
+        best_overlaps[pid] = best_overlap
+    return best_overlaps, pred_matched, gt_matched
+  def _unpack_evaluation_dictionary_items(self, eval_dict):
+    """Unpack items from the evaluation dictionary."""
+    input_data_fields = standard_fields.InputDataFields
+    detection_fields = standard_fields.DetectionResultFields
+    image_id = eval_dict[input_data_fields.key]
+    groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes]
+    groundtruth_instance_masks = eval_dict[
+        input_data_fields.groundtruth_instance_masks]
+    groundtruth_is_crowd = eval_dict.get(input_data_fields.groundtruth_is_crowd,
+                                         None)
+    num_gt_masks_per_image = eval_dict.get(
+        input_data_fields.num_groundtruth_boxes, None)
+    detection_classes = eval_dict[detection_fields.detection_classes]
+    detection_masks = eval_dict[detection_fields.detection_masks]
+    num_det_masks_per_image = eval_dict.get(detection_fields.num_detections,
+                                            None)
+    if groundtruth_is_crowd is None:
+      groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
+    if not image_id.shape.as_list():
+      # Apply a batch dimension to all tensors.
+      image_id = tf.expand_dims(image_id, 0)
+      groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
+      groundtruth_instance_masks = tf.expand_dims(groundtruth_instance_masks, 0)
+      groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0)
+      detection_classes = tf.expand_dims(detection_classes, 0)
+      detection_masks = tf.expand_dims(detection_masks, 0)
+      if num_gt_masks_per_image is None:
+        num_gt_masks_per_image = tf.shape(groundtruth_classes)[1:2]
+      else:
+        num_gt_masks_per_image = tf.expand_dims(num_gt_masks_per_image, 0)
+      if num_det_masks_per_image is None:
+        num_det_masks_per_image = tf.shape(detection_classes)[1:2]
+      else:
+        num_det_masks_per_image = tf.expand_dims(num_det_masks_per_image, 0)
+    else:
+      if num_gt_masks_per_image is None:
+        num_gt_masks_per_image = tf.tile(
+            tf.shape(groundtruth_classes)[1:2],
+            multiples=tf.shape(groundtruth_classes)[0:1])
+      if num_det_masks_per_image is None:
+        num_det_masks_per_image = tf.tile(
+            tf.shape(detection_classes)[1:2],
+            multiples=tf.shape(detection_classes)[0:1])
+    return (image_id, groundtruth_classes, groundtruth_instance_masks,
+            groundtruth_is_crowd, num_gt_masks_per_image, detection_classes,
+            detection_masks, num_det_masks_per_image)
--- a/research/object_detection/metrics/coco_evaluation_test.py
+++ b/research/object_detection/metrics/coco_evaluation_test.py
@@ -18,10 +18,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import unittest
 import numpy as np
 import tensorflow.compat.v1 as tf
 from object_detection.core import standard_fields
 from object_detection.metrics import coco_evaluation
+from object_detection.utils import tf_version
 def _get_categories_list():
@@ -250,6 +252,7 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
          })
+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
 class CocoEvaluationPyFuncTest(tf.test.TestCase):
  def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
@@ -926,6 +929,7 @@ class CocoKeypointEvaluationTest(tf.test.TestCase):
                           -1.0)
+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
 class CocoKeypointEvaluationPyFuncTest(tf.test.TestCase):
  def testGetOneMAPWithMatchingKeypoints(self):
@@ -1438,6 +1442,7 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
    self.assertFalse(coco_evaluator._detection_masks_list)
+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
 class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
  def testAddEvalDict(self):
@@ -1716,5 +1721,221 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
    self.assertFalse(coco_evaluator._detection_masks_list)
+def _get_panoptic_test_data():
+  # image1 contains 3 people in gt, (2 normal annotation and 1 "is_crowd"
+  # annotation), and 3 people in prediction.
+  gt_masks1 = np.zeros((3, 50, 50), dtype=np.uint8)
+  result_masks1 = np.zeros((3, 50, 50), dtype=np.uint8)
+  gt_masks1[0, 10:20, 20:30] = 1
+  result_masks1[0, 10:18, 20:30] = 1
+  gt_masks1[1, 25:30, 25:35] = 1
+  result_masks1[1, 18:25, 25:30] = 1
+  gt_masks1[2, 40:50, 40:50] = 1
+  result_masks1[2, 47:50, 47:50] = 1
+  gt_class1 = np.array([1, 1, 1])
+  gt_is_crowd1 = np.array([0, 0, 1])
+  result_class1 = np.array([1, 1, 1])
+  # image2 contains 1 dog and 1 cat in gt, while 1 person and 1 dog in
+  # prediction.
+  gt_masks2 = np.zeros((2, 30, 40), dtype=np.uint8)
+  result_masks2 = np.zeros((2, 30, 40), dtype=np.uint8)
+  gt_masks2[0, 5:15, 20:35] = 1
+  gt_masks2[1, 20:30, 0:10] = 1
+  result_masks2[0, 20:25, 10:15] = 1
+  result_masks2[1, 6:15, 15:35] = 1
+  gt_class2 = np.array([2, 3])
+  gt_is_crowd2 = np.array([0, 0])
+  result_class2 = np.array([1, 2])
+  gt_class = [gt_class1, gt_class2]
+  gt_masks = [gt_masks1, gt_masks2]
+  gt_is_crowd = [gt_is_crowd1, gt_is_crowd2]
+  result_class = [result_class1, result_class2]
+  result_masks = [result_masks1, result_masks2]
+  return gt_class, gt_masks, gt_is_crowd, result_class, result_masks
+class CocoPanopticEvaluationTest(tf.test.TestCase):
+  def test_panoptic_quality(self):
+    pq_evaluator = coco_evaluation.CocoPanopticSegmentationEvaluator(
+        _get_categories_list(), include_metrics_per_category=True)
+    (gt_class, gt_masks, gt_is_crowd, result_class,
+     result_masks) = _get_panoptic_test_data()
+    for i in range(2):
+      pq_evaluator.add_single_ground_truth_image_info(
+          image_id='image%d' % i,
+          groundtruth_dict={
+              standard_fields.InputDataFields.groundtruth_classes:
+                  gt_class[i],
+              standard_fields.InputDataFields.groundtruth_instance_masks:
+                  gt_masks[i],
+              standard_fields.InputDataFields.groundtruth_is_crowd:
+                  gt_is_crowd[i]
+          })
+      pq_evaluator.add_single_detected_image_info(
+          image_id='image%d' % i,
+          detections_dict={
+              standard_fields.DetectionResultFields.detection_classes:
+                  result_class[i],
+              standard_fields.DetectionResultFields.detection_masks:
+                  result_masks[i]
+          })
+    metrics = pq_evaluator.evaluate()
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU_ByCategory/person'],
+                           0.32)
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU_ByCategory/dog'],
+                           135.0 / 195)
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU_ByCategory/cat'], 0)
+    self.assertAlmostEqual(metrics['SegmentationQuality@0.50IOU'],
+                           (0.8 + 135.0 / 195) / 3)
+    self.assertAlmostEqual(metrics['RecognitionQuality@0.50IOU'], (0.4 + 1) / 3)
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU'],
+                           (0.32 + 135.0 / 195) / 3)
+    self.assertEqual(metrics['NumValidClasses'], 3)
+    self.assertEqual(metrics['NumTotalClasses'], 3)
+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
+class CocoPanopticEvaluationPyFuncTest(tf.test.TestCase):
+  def testPanopticQualityNoBatch(self):
+    pq_evaluator = coco_evaluation.CocoPanopticSegmentationEvaluator(
+        _get_categories_list(), include_metrics_per_category=True)
+    image_id = tf.placeholder(tf.string, shape=())
+    groundtruth_classes = tf.placeholder(tf.int32, shape=(None))
+    groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
+    groundtruth_is_crowd = tf.placeholder(tf.int32, shape=(None))
+    detection_classes = tf.placeholder(tf.int32, shape=(None))
+    detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
+    input_data_fields = standard_fields.InputDataFields
+    detection_fields = standard_fields.DetectionResultFields
+    eval_dict = {
+        input_data_fields.key: image_id,
+        input_data_fields.groundtruth_classes: groundtruth_classes,
+        input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.groundtruth_is_crowd: groundtruth_is_crowd,
+        detection_fields.detection_classes: detection_classes,
+        detection_fields.detection_masks: detection_masks,
+    }
+    eval_metric_ops = pq_evaluator.get_estimator_eval_metric_ops(eval_dict)
+    _, update_op = eval_metric_ops['PanopticQuality@0.50IOU']
+    (gt_class, gt_masks, gt_is_crowd, result_class,
+     result_masks) = _get_panoptic_test_data()
+    with self.test_session() as sess:
+      for i in range(2):
+        sess.run(
+            update_op,
+            feed_dict={
+                image_id: 'image%d' % i,
+                groundtruth_classes: gt_class[i],
+                groundtruth_masks: gt_masks[i],
+                groundtruth_is_crowd: gt_is_crowd[i],
+                detection_classes: result_class[i],
+                detection_masks: result_masks[i]
+            })
+    metrics = {}
+    for key, (value_op, _) in eval_metric_ops.items():
+      metrics[key] = value_op
+    metrics = sess.run(metrics)
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU'],
+                           (0.32 + 135.0 / 195) / 3)
+  def testPanopticQualityBatched(self):
+    pq_evaluator = coco_evaluation.CocoPanopticSegmentationEvaluator(
+        _get_categories_list(), include_metrics_per_category=True)
+    batch_size = 2
+    image_id = tf.placeholder(tf.string, shape=(batch_size))
+    groundtruth_classes = tf.placeholder(tf.int32, shape=(batch_size, None))
+    groundtruth_masks = tf.placeholder(
+        tf.uint8, shape=(batch_size, None, None, None))
+    groundtruth_is_crowd = tf.placeholder(tf.int32, shape=(batch_size, None))
+    detection_classes = tf.placeholder(tf.int32, shape=(batch_size, None))
+    detection_masks = tf.placeholder(
+        tf.uint8, shape=(batch_size, None, None, None))
+    num_gt_masks_per_image = tf.placeholder(tf.int32, shape=(batch_size))
+    num_det_masks_per_image = tf.placeholder(tf.int32, shape=(batch_size))
+    input_data_fields = standard_fields.InputDataFields
+    detection_fields = standard_fields.DetectionResultFields
+    eval_dict = {
+        input_data_fields.key: image_id,
+        input_data_fields.groundtruth_classes: groundtruth_classes,
+        input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.groundtruth_is_crowd: groundtruth_is_crowd,
+        input_data_fields.num_groundtruth_boxes: num_gt_masks_per_image,
+        detection_fields.detection_classes: detection_classes,
+        detection_fields.detection_masks: detection_masks,
+        detection_fields.num_detections: num_det_masks_per_image,
+    }
+    eval_metric_ops = pq_evaluator.get_estimator_eval_metric_ops(eval_dict)
+    _, update_op = eval_metric_ops['PanopticQuality@0.50IOU']
+    (gt_class, gt_masks, gt_is_crowd, result_class,
+     result_masks) = _get_panoptic_test_data()
+    with self.test_session() as sess:
+      sess.run(
+          update_op,
+          feed_dict={
+              image_id: ['image0', 'image1'],
+              groundtruth_classes:
+                  np.stack([
+                      gt_class[0],
+                      np.pad(gt_class[1], (0, 1), mode='constant')
+                  ],
+                           axis=0),
+              groundtruth_masks:
+                  np.stack([
+                      np.pad(
+                          gt_masks[0], ((0, 0), (0, 10), (0, 10)),
+                          mode='constant'),
+                      np.pad(
+                          gt_masks[1], ((0, 1), (0, 30), (0, 20)),
+                          mode='constant'),
+                  ],
+                           axis=0),
+              groundtruth_is_crowd:
+                  np.stack([
+                      gt_is_crowd[0],
+                      np.pad(gt_is_crowd[1], (0, 1), mode='constant')
+                  ],
+                           axis=0),
+              num_gt_masks_per_image: np.array([3, 2]),
+              detection_classes:
+                  np.stack([
+                      result_class[0],
+                      np.pad(result_class[1], (0, 1), mode='constant')
+                  ],
+                           axis=0),
+              detection_masks:
+                  np.stack([
+                      np.pad(
+                          result_masks[0], ((0, 0), (0, 10), (0, 10)),
+                          mode='constant'),
+                      np.pad(
+                          result_masks[1], ((0, 1), (0, 30), (0, 20)),
+                          mode='constant'),
+                  ],
+                           axis=0),
+              num_det_masks_per_image: np.array([3, 2]),
+          })
+    metrics = {}
+    for key, (value_op, _) in eval_metric_ops.items():
+      metrics[key] = value_op
+    metrics = sess.run(metrics)
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU'],
+                           (0.32 + 135.0 / 195) / 3)
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/metrics/coco_tools.py
+++ b/research/object_detection/metrics/coco_tools.py
@@ -52,6 +52,7 @@ from pycocotools import coco
 from pycocotools import cocoeval
 from pycocotools import mask
+import six
 from six.moves import range
 from six.moves import zip
 import tensorflow.compat.v1 as tf
@@ -353,7 +354,9 @@ def _RleCompress(masks):
  Returns:
    A pycocotools Run-length encoding of the mask.
  """
-  return mask.encode(np.asfortranarray(masks))
+  rle = mask.encode(np.asfortranarray(masks))
+  rle['counts'] = six.ensure_str(rle['counts'])
+  return rle
 def ExportSingleImageGroundtruthToCoco(image_id,

--- a/research/object_detection/metrics/offline_eval_map_corloc.py
+++ b/research/object_detection/metrics/offline_eval_map_corloc.py
@@ -36,8 +36,8 @@ import os
 import re
 import tensorflow.compat.v1 as tf
+from object_detection import eval_util
 from object_detection.core import standard_fields
-from object_detection.legacy import evaluator
 from object_detection.metrics import tf_example_parser
 from object_detection.utils import config_util
 from object_detection.utils import label_map_util
@@ -94,7 +94,7 @@ def read_data_and_evaluate(input_config, eval_config):
    categories = label_map_util.create_categories_from_labelmap(
        input_config.label_map_path)
-    object_detection_evaluators = evaluator.get_evaluators(
+    object_detection_evaluators = eval_util.get_evaluators(
        eval_config, categories)
    # Support a single evaluator
    object_detection_evaluator = object_detection_evaluators[0]

--- a/research/object_detection/model_lib_test.py
+++ b/research/object_detection/model_lib_test.py
@@ -20,19 +20,17 @@ from __future__ import print_function
 import functools
 import os
+import unittest
 import numpy as np
 import tensorflow.compat.v1 as tf
-from tensorflow.contrib.tpu.python.tpu import tpu_config
-from tensorflow.contrib.tpu.python.tpu import tpu_estimator
 from object_detection import inputs
 from object_detection import model_hparams
 from object_detection import model_lib
 from object_detection.builders import model_builder
 from object_detection.core import standard_fields as fields
 from object_detection.utils import config_util
+from object_detection.utils import tf_version
 # Model for test. Options are:
@@ -122,6 +120,7 @@ def _make_initializable_iterator(dataset):
  return iterator
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class ModelLibTest(tf.test.TestCase):
  @classmethod
@@ -337,8 +336,7 @@ class ModelLibTest(tf.test.TestCase):
  def test_create_tpu_estimator_and_inputs(self):
    """Tests that number of train/eval defaults to config values."""
+    run_config = tf.estimator.tpu.RunConfig()
-    run_config = tpu_config.RunConfig()
    hparams = model_hparams.create_hparams(
        hparams_overrides='load_pretrained=false')
    pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
@@ -352,7 +350,7 @@ class ModelLibTest(tf.test.TestCase):
    estimator = train_and_eval_dict['estimator']
    train_steps = train_and_eval_dict['train_steps']
-    self.assertIsInstance(estimator, tpu_estimator.TPUEstimator)
+    self.assertIsInstance(estimator, tf.estimator.tpu.TPUEstimator)
    self.assertEqual(20, train_steps)
  def test_create_train_and_eval_specs(self):
@@ -406,6 +404,7 @@ class ModelLibTest(tf.test.TestCase):
    self.assertEqual(None, experiment.eval_steps)
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class UnbatchTensorsTest(tf.test.TestCase):
  def test_unbatch_without_unpadding(self):

--- a/research/object_detection/model_lib_v2_test.py
+++ b/research/object_detection/model_lib_v2_test.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 import os
 import tempfile
+import unittest
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
@@ -32,6 +32,7 @@ from object_detection.builders import model_builder
 from object_detection.core import model
 from object_detection.protos import train_pb2
 from object_detection.utils import config_util
+from object_detection.utils import tf_version
 if six.PY2:
  import mock  # pylint: disable=g-importing-member,g-import-not-at-top
@@ -72,6 +73,7 @@ def _get_config_kwarg_overrides():
  }
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
 class ModelLibTest(tf.test.TestCase):
  @classmethod
@@ -139,6 +141,7 @@ class SimpleModel(model.DetectionModel):
    return []
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
 class ModelCheckpointTest(tf.test.TestCase):
  """Test for model checkpoint related functionality."""
@@ -171,6 +174,7 @@ class IncompatibleModel(SimpleModel):
    return {'weight': self.weight}
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
 class CheckpointV2Test(tf.test.TestCase):
  def setUp(self):

--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -358,7 +358,7 @@ def load_fine_tune_checkpoint(
    ckpt.restore(checkpoint_path).assert_existing_objects_matched()
-def _get_filepath(strategy, filepath):
+def get_filepath(strategy, filepath):
  """Get appropriate filepath for worker.
  Args:
@@ -377,7 +377,7 @@ def _get_filepath(strategy, filepath):
    return os.path.join(filepath, 'temp_worker_{:03d}'.format(task_id))
-def _clean_temporary_directories(strategy, filepath):
+def clean_temporary_directories(strategy, filepath):
  """Temporary directory clean up for MultiWorker Mirrored Strategy.
  This is needed for all non-chief workers.
@@ -539,8 +539,8 @@ def train_loop(
  ## Train the model
  # Get the appropriate filepath (temporary or not) based on whether the worker
  # is the chief.
-  summary_writer_filepath = _get_filepath(strategy,
+  summary_writer_filepath = get_filepath(strategy,
-                                          os.path.join(model_dir, 'train'))
+                                         os.path.join(model_dir, 'train'))
  summary_writer = tf.compat.v2.summary.create_file_writer(
      summary_writer_filepath)
@@ -567,7 +567,7 @@ def train_loop(
        ckpt = tf.compat.v2.train.Checkpoint(
            step=global_step, model=detection_model, optimizer=optimizer)
-        manager_dir = _get_filepath(strategy, model_dir)
+        manager_dir = get_filepath(strategy, model_dir)
        if not strategy.extended.should_checkpoint:
          checkpoint_max_to_keep = 1
        manager = tf.compat.v2.train.CheckpointManager(
@@ -615,6 +615,10 @@ def train_loop(
          return _sample_and_train(strategy, train_step_fn, data_iterator)
        train_input_iter = iter(train_input)
+        if int(global_step.value()) == 0:
+          manager.save()
        checkpointed_step = int(global_step.value())
        logged_step = global_step.value()
@@ -646,8 +650,8 @@ def train_loop(
  # Remove the checkpoint directories of the non-chief workers that
  # MultiWorkerMirroredStrategy forces us to save during sync distributed
  # training.
-  _clean_temporary_directories(strategy, manager_dir)
+  clean_temporary_directories(strategy, manager_dir)
-  _clean_temporary_directories(strategy, summary_writer_filepath)
+  clean_temporary_directories(strategy, summary_writer_filepath)
 def eager_eval_loop(

--- a/research/object_detection/model_main_tf2.py
+++ b/research/object_detection/model_main_tf2.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Creates and runs TF2 object detection models.
+##################################
+NOTE: This module has not been fully tested; please bear with us while we iron
+out the kinks.
+##################################
+When a TPU device is available, this binary uses TPUStrategy. Otherwise, it uses
+GPUS with MirroredStrategy/MultiWorkerMirroredStrategy.
+For local training/evaluation run:
+PIPELINE_CONFIG_PATH=path/to/pipeline.config
+MODEL_DIR=/tmp/model_outputs
+NUM_TRAIN_STEPS=10000
+SAMPLE_1_OF_N_EVAL_EXAMPLES=1
+python model_main_tf2.py -- \
+  --model_dir=$MODEL_DIR --num_train_steps=$NUM_TRAIN_STEPS \
+  --sample_1_of_n_eval_examples=$SAMPLE_1_OF_N_EVAL_EXAMPLES \
+  --pipeline_config_path=$PIPELINE_CONFIG_PATH \
+  --alsologtostderr
+"""
+from absl import flags
+import tensorflow.compat.v2 as tf
+from object_detection import model_hparams
+from object_detection import model_lib_v2
+flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
+                    'file.')
+flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
+flags.DEFINE_bool('eval_on_train_data', False, 'Enable evaluating on train '
+                  'data (only supported in distributed training).')
+flags.DEFINE_integer('sample_1_of_n_eval_examples', None, 'Will sample one of '
+                     'every n eval input examples, where n is provided.')
+flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
+                     'one of every n train input examples for evaluation, '
+                     'where n is provided. This is only used if '
+                     '`eval_training_data` is True.')
+flags.DEFINE_string(
+    'hparams_overrides', None, 'Hyperparameter overrides, '
+    'represented as a string containing comma-separated '
+    'hparam_name=value pairs.')
+flags.DEFINE_string(
+    'model_dir', None, 'Path to output model directory '
+                       'where event and checkpoint files will be written.')
+flags.DEFINE_string(
+    'checkpoint_dir', None, 'Path to directory holding a checkpoint.  If '
+    '`checkpoint_dir` is provided, this binary operates in eval-only mode, '
+    'writing resulting metrics to `model_dir`.')
+flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
+                     'evaluation checkpoint before exiting.')
+flags.DEFINE_integer(
+    'num_workers', 1, 'When num_workers > 1, training uses '
+    'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
+    'MirroredStrategy.')
+FLAGS = flags.FLAGS
+def main(unused_argv):
+  flags.mark_flag_as_required('model_dir')
+  flags.mark_flag_as_required('pipeline_config_path')
+  tf.config.set_soft_device_placement(True)
+  if FLAGS.checkpoint_dir:
+    model_lib_v2.eval_continuously(
+        hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
+        pipeline_config_path=FLAGS.pipeline_config_path,
+        model_dir=FLAGS.model_dir,
+        train_steps=FLAGS.num_train_steps,
+        sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
+        sample_1_of_n_eval_on_train_examples=(
+            FLAGS.sample_1_of_n_eval_on_train_examples),
+        checkpoint_dir=FLAGS.checkpoint_dir,
+        wait_interval=300, timeout=FLAGS.eval_timeout)
+  else:
+    if tf.config.get_visible_devices('TPU'):
+      resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
+      tf.config.experimental_connect_to_cluster(resolver)
+      tf.tpu.experimental.initialize_tpu_system(resolver)
+      strategy = tf.distribute.experimental.TPUStrategy(resolver)
+    elif FLAGS.num_workers > 1:
+      strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
+    else:
+      strategy = tf.compat.v2.distribute.MirroredStrategy()
+    with strategy.scope():
+      model_lib_v2.train_loop(
+          hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
+          pipeline_config_path=FLAGS.pipeline_config_path,
+          model_dir=FLAGS.model_dir,
+          train_steps=FLAGS.num_train_steps,
+          use_tpu=FLAGS.use_tpu)
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/object_detection/models/center_net_hourglass_feature_extractor.py
+++ b/research/object_detection/models/center_net_hourglass_feature_extractor.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Hourglass[1] feature extractor for CenterNet[2] meta architecture.
+[1]: https://arxiv.org/abs/1603.06937
+[2]: https://arxiv.org/abs/1904.07850
+"""
+from object_detection.meta_architectures import center_net_meta_arch
+from object_detection.models.keras_models import hourglass_network
+class CenterNetHourglassFeatureExtractor(
+    center_net_meta_arch.CenterNetFeatureExtractor):
+  """The hourglass feature extractor for CenterNet.
+  This class is a thin wrapper around the HourglassFeatureExtractor class
+  along with some preprocessing methods inherited from the base class.
+  """
+  def __init__(self, hourglass_net, channel_means=(0., 0., 0.),
+               channel_stds=(1., 1., 1.), bgr_ordering=False):
+    """Intializes the feature extractor.
+    Args:
+      hourglass_net: The underlying hourglass network to use.
+      channel_means: A tuple of floats, denoting the mean of each channel
+        which will be subtracted from it.
+      channel_stds: A tuple of floats, denoting the standard deviation of each
+        channel. Each channel will be divided by its standard deviation value.
+      bgr_ordering: bool, if set will change the channel ordering to be in the
+        [blue, red, green] order.
+    """
+    super(CenterNetHourglassFeatureExtractor, self).__init__(
+        channel_means=channel_means, channel_stds=channel_stds,
+        bgr_ordering=bgr_ordering)
+    self._network = hourglass_net
+  def call(self, inputs):
+    return self._network(inputs)
+  @property
+  def out_stride(self):
+    """The stride in the output image of the network."""
+    return 4
+  @property
+  def num_feature_outputs(self):
+    """Ther number of feature outputs returned by the feature extractor."""
+    return self._network.num_hourglasses
+  def get_model(self):
+    return self._network
+def hourglass_104(channel_means, channel_stds, bgr_ordering):
+  """The Hourglass-104 backbone for CenterNet."""
+  network = hourglass_network.hourglass_104()
+  return CenterNetHourglassFeatureExtractor(
+      network, channel_means=channel_means, channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering)
--- a/research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing hourglass feature extractor for CenterNet."""
+import unittest
+import numpy as np
+import tensorflow.compat.v1 as tf
+from object_detection.models import center_net_hourglass_feature_extractor as hourglass
+from object_detection.models.keras_models import hourglass_network
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetHourglassFeatureExtractorTest(test_case.TestCase):
+  def test_center_net_hourglass_feature_extractor(self):
+    net = hourglass_network.HourglassNetwork(
+        num_stages=4, blocks_per_stage=[2, 3, 4, 5, 6],
+        channel_dims=[4, 6, 8, 10, 12, 14], num_hourglasses=2)
+    model = hourglass.CenterNetHourglassFeatureExtractor(net)
+    def graph_fn():
+      return model(tf.zeros((2, 64, 64, 3), dtype=np.float32))
+    outputs = self.execute(graph_fn, [])
+    self.assertEqual(outputs[0].shape, (2, 16, 16, 6))
+    self.assertEqual(outputs[1].shape, (2, 16, 16, 6))
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/center_net_resnet_feature_extractor.py
+++ b/research/object_detection/models/center_net_resnet_feature_extractor.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resnetv2 based feature extractors for CenterNet[1] meta architecture.
+[1]: https://arxiv.org/abs/1904.07850
+"""
+import tensorflow.compat.v1 as tf
+from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor
+class CenterNetResnetFeatureExtractor(CenterNetFeatureExtractor):
+  """Resnet v2 base feature extractor for the CenterNet model."""
+  def __init__(self, resnet_type, channel_means=(0., 0., 0.),
+               channel_stds=(1., 1., 1.), bgr_ordering=False):
+    """Initializes the feature extractor with a specific ResNet architecture.
+    Args:
+      resnet_type: A string specifying which kind of ResNet to use. Currently
+        only `resnet_v2_50` and `resnet_v2_101` are supported.
+      channel_means: A tuple of floats, denoting the mean of each channel
+        which will be subtracted from it.
+      channel_stds: A tuple of floats, denoting the standard deviation of each
+        channel. Each channel will be divided by its standard deviation value.
+      bgr_ordering: bool, if set will change the channel ordering to be in the
+        [blue, red, green] order.
+    """
+    super(CenterNetResnetFeatureExtractor, self).__init__(
+        channel_means=channel_means, channel_stds=channel_stds,
+        bgr_ordering=bgr_ordering)
+    if resnet_type == 'resnet_v2_101':
+      self._base_model = tf.keras.applications.ResNet101V2(weights=None)
+      output_layer = 'conv5_block3_out'
+    elif resnet_type == 'resnet_v2_50':
+      self._base_model = tf.keras.applications.ResNet50V2(weights=None)
+      output_layer = 'conv5_block3_out'
+    else:
+      raise ValueError('Unknown Resnet Model {}'.format(resnet_type))
+    output_layer = self._base_model.get_layer(output_layer)
+    self._resnet_model = tf.keras.models.Model(inputs=self._base_model.input,
+                                               outputs=output_layer.output)
+    resnet_output = self._resnet_model(self._base_model.input)
+    for num_filters in [256, 128, 64]:
+      # TODO(vighneshb) This section has a few differences from the paper
+      # Figure out how much of a performance impact they have.
+      # 1. We use a simple convolution instead of a deformable convolution
+      conv = tf.keras.layers.Conv2D(filters=num_filters, kernel_size=3,
+                                    strides=1, padding='same')
+      resnet_output = conv(resnet_output)
+      resnet_output = tf.keras.layers.BatchNormalization()(resnet_output)
+      resnet_output = tf.keras.layers.ReLU()(resnet_output)
+      # 2. We use the default initialization for the convolution layers
+      # instead of initializing it to do bilinear upsampling.
+      conv_transpose = tf.keras.layers.Conv2DTranspose(filters=num_filters,
+                                                       kernel_size=3, strides=2,
+                                                       padding='same')
+      resnet_output = conv_transpose(resnet_output)
+      resnet_output = tf.keras.layers.BatchNormalization()(resnet_output)
+      resnet_output = tf.keras.layers.ReLU()(resnet_output)
+    self._feature_extractor_model = tf.keras.models.Model(
+        inputs=self._base_model.input, outputs=resnet_output)
+  def preprocess(self, resized_inputs):
+    """Preprocess input images for the ResNet model.
+    This scales images in the range [0, 255] to the range [-1, 1]
+    Args:
+      resized_inputs: a [batch, height, width, channels] float32 tensor.
+    Returns:
+      outputs: a [batch, height, width, channels] float32 tensor.
+    """
+    resized_inputs = super(CenterNetResnetFeatureExtractor, self).preprocess(
+        resized_inputs)
+    return tf.keras.applications.resnet_v2.preprocess_input(resized_inputs)
+  def load_feature_extractor_weights(self, path):
+    self._base_model.load_weights(path)
+  def get_base_model(self):
+    """Get base resnet model for inspection and testing."""
+    return self._base_model
+  def call(self, inputs):
+    """Returns image features extracted by the backbone.
+    Args:
+      inputs: An image tensor of shape [batch_size, input_height,
+        input_width, 3]
+    Returns:
+      features_list: A list of length 1 containing a tensor of shape
+        [batch_size, input_height // 4, input_width // 4, 64] containing
+        the features extracted by the ResNet.
+    """
+    return [self._feature_extractor_model(inputs)]
+  @property
+  def num_feature_outputs(self):
+    return 1
+  @property
+  def out_stride(self):
+    return 4
+def resnet_v2_101(channel_means, channel_stds, bgr_ordering):
+  """The ResNet v2 101 feature extractor."""
+  return CenterNetResnetFeatureExtractor(
+      resnet_type='resnet_v2_101',
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering
+  )
+def resnet_v2_50(channel_means, channel_stds, bgr_ordering):
+  """The ResNet v2 50 feature extractor."""
+  return CenterNetResnetFeatureExtractor(
+      resnet_type='resnet_v2_50',
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering)
--- a/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing ResNet v2 models for the CenterNet meta architecture."""
+import unittest
+import numpy as np
+import tensorflow.compat.v1 as tf
+from object_detection.models import center_net_resnet_feature_extractor
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetResnetFeatureExtractorTest(test_case.TestCase):
+  def test_output_size(self):
+    """Verify that shape of features returned by the backbone is correct."""
+    model = center_net_resnet_feature_extractor.\
+                CenterNetResnetFeatureExtractor('resnet_v2_101')
+    def graph_fn():
+      img = np.zeros((8, 224, 224, 3), dtype=np.float32)
+      processed_img = model.preprocess(img)
+      return model(processed_img)
+    outputs = self.execute(graph_fn, [])
+    self.assertEqual(outputs.shape, (8, 56, 56, 64))
+  def test_output_size_resnet50(self):
+    """Verify that shape of features returned by the backbone is correct."""
+    model = center_net_resnet_feature_extractor.\
+                CenterNetResnetFeatureExtractor('resnet_v2_50')
+    def graph_fn():
+      img = np.zeros((8, 224, 224, 3), dtype=np.float32)
+      processed_img = model.preprocess(img)
+      return model(processed_img)
+    outputs = self.execute(graph_fn, [])
+    self.assertEqual(outputs.shape, (8, 56, 56, 64))
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py
+++ b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resnetv1 FPN [1] based feature extractors for CenterNet[2] meta architecture.
+[1]: https://arxiv.org/abs/1612.03144.
+[2]: https://arxiv.org/abs/1904.07850.
+"""
+import tensorflow.compat.v1 as tf
+from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor
+_RESNET_MODEL_OUTPUT_LAYERS = {
+    'resnet_v1_50': ['conv2_block3_out', 'conv3_block4_out',
+                     'conv4_block6_out', 'conv5_block3_out'],
+    'resnet_v1_101': ['conv2_block3_out', 'conv3_block4_out',
+                      'conv4_block23_out', 'conv5_block3_out'],
+}
+class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor):
+  """Resnet v1 FPN base feature extractor for the CenterNet model.
+  This feature extractor uses residual skip connections and nearest neighbor
+  upsampling to produce an output feature map of stride 4, which has precise
+  localization information along with strong semantic information from the top
+  of the net. This design does not exactly follow the original FPN design,
+  specifically:
+  - Since only one output map is necessary for heatmap prediction (stride 4
+    output), the top-down feature maps can have different numbers of channels.
+    Specifically, the top down feature maps have the following sizes:
+    [h/4, w/4, 64], [h/8, w/8, 128], [h/16, w/16, 256], [h/32, w/32, 256].
+  - No additional coarse features are used after conv5_x.
+  """
+  def __init__(self, resnet_type, channel_means=(0., 0., 0.),
+               channel_stds=(1., 1., 1.), bgr_ordering=False):
+    """Initializes the feature extractor with a specific ResNet architecture.
+    Args:
+      resnet_type: A string specifying which kind of ResNet to use. Currently
+        only `resnet_v1_50` and `resnet_v1_101` are supported.
+      channel_means: A tuple of floats, denoting the mean of each channel
+        which will be subtracted from it.
+      channel_stds: A tuple of floats, denoting the standard deviation of each
+        channel. Each channel will be divided by its standard deviation value.
+      bgr_ordering: bool, if set will change the channel ordering to be in the
+        [blue, red, green] order.
+    """
+    super(CenterNetResnetV1FpnFeatureExtractor, self).__init__(
+        channel_means=channel_means, channel_stds=channel_stds,
+        bgr_ordering=bgr_ordering)
+    if resnet_type == 'resnet_v1_50':
+      self._base_model = tf.keras.applications.ResNet50(weights=None)
+    elif resnet_type == 'resnet_v1_101':
+      self._base_model = tf.keras.applications.ResNet101(weights=None)
+    else:
+      raise ValueError('Unknown Resnet Model {}'.format(resnet_type))
+    output_layers = _RESNET_MODEL_OUTPUT_LAYERS[resnet_type]
+    outputs = [self._base_model.get_layer(output_layer_name).output
+               for output_layer_name in output_layers]
+    self._resnet_model = tf.keras.models.Model(inputs=self._base_model.input,
+                                               outputs=outputs)
+    resnet_outputs = self._resnet_model(self._base_model.input)
+    # Construct the top-down feature maps.
+    top_layer = resnet_outputs[-1]
+    residual_op = tf.keras.layers.Conv2D(filters=256, kernel_size=1,
+                                         strides=1, padding='same')
+    top_down = residual_op(top_layer)
+    num_filters_list = [256, 128, 64]
+    for i, num_filters in enumerate(num_filters_list):
+      level_ind = 2 - i
+      # Upsample.
+      upsample_op = tf.keras.layers.UpSampling2D(2, interpolation='nearest')
+      top_down = upsample_op(top_down)
+      # Residual (skip-connection) from bottom-up pathway.
+      residual_op = tf.keras.layers.Conv2D(filters=num_filters, kernel_size=1,
+                                           strides=1, padding='same')
+      residual = residual_op(resnet_outputs[level_ind])
+      # Merge.
+      top_down = top_down + residual
+      next_num_filters = num_filters_list[i+1] if i + 1 <= 2 else 64
+      conv = tf.keras.layers.Conv2D(filters=next_num_filters,
+                                    kernel_size=3, strides=1, padding='same')
+      top_down = conv(top_down)
+      top_down = tf.keras.layers.BatchNormalization()(top_down)
+      top_down = tf.keras.layers.ReLU()(top_down)
+    self._feature_extractor_model = tf.keras.models.Model(
+        inputs=self._base_model.input, outputs=top_down)
+  def preprocess(self, resized_inputs):
+    """Preprocess input images for the ResNet model.
+    This scales images in the range [0, 255] to the range [-1, 1]
+    Args:
+      resized_inputs: a [batch, height, width, channels] float32 tensor.
+    Returns:
+      outputs: a [batch, height, width, channels] float32 tensor.
+    """
+    resized_inputs = super(
+        CenterNetResnetV1FpnFeatureExtractor, self).preprocess(resized_inputs)
+    return tf.keras.applications.resnet.preprocess_input(resized_inputs)
+  def load_feature_extractor_weights(self, path):
+    self._base_model.load_weights(path)
+  def get_base_model(self):
+    """Get base resnet model for inspection and testing."""
+    return self._base_model
+  def call(self, inputs):
+    """Returns image features extracted by the backbone.
+    Args:
+      inputs: An image tensor of shape [batch_size, input_height,
+        input_width, 3]
+    Returns:
+      features_list: A list of length 1 containing a tensor of shape
+        [batch_size, input_height // 4, input_width // 4, 64] containing
+        the features extracted by the ResNet.
+    """
+    return [self._feature_extractor_model(inputs)]
+  @property
+  def num_feature_outputs(self):
+    return 1
+  @property
+  def out_stride(self):
+    return 4
+def resnet_v1_101_fpn(channel_means, channel_stds, bgr_ordering):
+  """The ResNet v1 101 FPN feature extractor."""
+  return CenterNetResnetV1FpnFeatureExtractor(
+      resnet_type='resnet_v1_101',
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering
+  )
+def resnet_v1_50_fpn(channel_means, channel_stds, bgr_ordering):
+  """The ResNet v1 50 FPN feature extractor."""
+  return CenterNetResnetV1FpnFeatureExtractor(
+      resnet_type='resnet_v1_50',
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering)
--- a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing ResNet v1 FPN models for the CenterNet meta architecture."""
+import unittest
+from absl.testing import parameterized
+import numpy as np
+import tensorflow.compat.v1 as tf
+from object_detection.models import center_net_resnet_v1_fpn_feature_extractor
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetResnetV1FpnFeatureExtractorTest(test_case.TestCase,
+                                               parameterized.TestCase):
+  @parameterized.parameters(
+      {'resnet_type': 'resnet_v1_50'},
+      {'resnet_type': 'resnet_v1_101'},
+  )
+  def test_correct_output_size(self, resnet_type):
+    """Verify that shape of features returned by the backbone is correct."""
+    model = center_net_resnet_v1_fpn_feature_extractor.\
+                CenterNetResnetV1FpnFeatureExtractor(resnet_type)
+    def graph_fn():
+      img = np.zeros((8, 224, 224, 3), dtype=np.float32)
+      processed_img = model.preprocess(img)
+      return model(processed_img)
+    self.assertEqual(self.execute(graph_fn, []).shape, (8, 56, 56, 64))
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
@@ -14,13 +14,16 @@
 # ==============================================================================
 """Tests for embedded_ssd_mobilenet_v1_feature_extractor."""
+import unittest
 import numpy as np
 import tensorflow.compat.v1 as tf
 from object_detection.models import embedded_ssd_mobilenet_v1_feature_extractor
 from object_detection.models import ssd_feature_extractor_test
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class EmbeddedSSDMobileNetV1FeatureExtractorTest(
    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):

--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
@@ -14,12 +14,14 @@
 # ==============================================================================
 """Tests for models.faster_rcnn_inception_resnet_v2_feature_extractor."""
+import unittest
 import tensorflow.compat.v1 as tf
 from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class FasterRcnnInceptionResnetV2FeatureExtractorTest(tf.test.TestCase):
  def _build_feature_extractor(self, first_stage_features_stride):

--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_keras_feature_extractor_test.py
@@ -14,12 +14,14 @@
 # ==============================================================================
 """Tests for models.faster_rcnn_inception_resnet_v2_keras_feature_extractor."""
+import unittest
 import tensorflow.compat.v1 as tf
 from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
 class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
  def _build_feature_extractor(self, first_stage_features_stride):
@@ -38,11 +40,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
        name='TestScope')(preprocessed_inputs)
    features_shape = tf.shape(rpn_feature_map)
-    init_op = tf.global_variables_initializer()
+    self.assertAllEqual(features_shape.numpy(), [1, 19, 19, 1088])
-    with self.test_session() as sess:
-      sess.run(init_op)
-      features_shape_out = sess.run(features_shape)
-      self.assertAllEqual(features_shape_out, [1, 19, 19, 1088])
  def test_extract_proposal_features_stride_eight(self):
    feature_extractor = self._build_feature_extractor(
@@ -53,11 +51,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
        name='TestScope')(preprocessed_inputs)
    features_shape = tf.shape(rpn_feature_map)
-    init_op = tf.global_variables_initializer()
+    self.assertAllEqual(features_shape.numpy(), [1, 28, 28, 1088])
-    with self.test_session() as sess:
-      sess.run(init_op)
-      features_shape_out = sess.run(features_shape)
-      self.assertAllEqual(features_shape_out, [1, 28, 28, 1088])
  def test_extract_proposal_features_half_size_input(self):
    feature_extractor = self._build_feature_extractor(
@@ -67,25 +61,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
    rpn_feature_map = feature_extractor.get_proposal_feature_extractor_model(
        name='TestScope')(preprocessed_inputs)
    features_shape = tf.shape(rpn_feature_map)
+    self.assertAllEqual(features_shape.numpy(), [1, 7, 7, 1088])
-    init_op = tf.global_variables_initializer()
-    with self.test_session() as sess:
-      sess.run(init_op)
-      features_shape_out = sess.run(features_shape)
-      self.assertAllEqual(features_shape_out, [1, 7, 7, 1088])
-  def test_extract_proposal_features_dies_on_invalid_stride(self):
-    with self.assertRaises(ValueError):
-      self._build_feature_extractor(first_stage_features_stride=99)
-  def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
-    feature_extractor = self._build_feature_extractor(
-        first_stage_features_stride=16)
-    preprocessed_inputs = tf.random_uniform(
-        [224, 224, 3], maxval=255, dtype=tf.float32)
-    with self.assertRaises(ValueError):
-      feature_extractor.get_proposal_feature_extractor_model(
-          name='TestScope')(preprocessed_inputs)
  def test_extract_box_classifier_features_returns_expected_size(self):
    feature_extractor = self._build_feature_extractor(
@@ -97,12 +73,7 @@ class FasterRcnnInceptionResnetV2KerasFeatureExtractorTest(tf.test.TestCase):
    proposal_classifier_features = (
        model(proposal_feature_maps))
    features_shape = tf.shape(proposal_classifier_features)
+    self.assertAllEqual(features_shape.numpy(), [2, 8, 8, 1536])
-    init_op = tf.global_variables_initializer()
-    with self.test_session() as sess:
-      sess.run(init_op)
-      features_shape_out = sess.run(features_shape)
-      self.assertAllEqual(features_shape_out, [2, 8, 8, 1536])
 if __name__ == '__main__':

--- a/research/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_inception_v2_feature_extractor_test.py
@@ -14,13 +14,15 @@
 # ==============================================================================
 """Tests for faster_rcnn_inception_v2_feature_extractor."""
+import unittest
 import numpy as np
 import tensorflow.compat.v1 as tf
 from object_detection.models import faster_rcnn_inception_v2_feature_extractor as faster_rcnn_inception_v2
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class FasterRcnnInceptionV2FeatureExtractorTest(tf.test.TestCase):
  def _build_feature_extractor(self, first_stage_features_stride):