Merge remote-tracking branch 'upstream/master' into add_multilevel_crop_and_resize

47bc1813 · syiming · d8611151 · b035a227 · 47bc1813 · 47bc1813
Commit 47bc1813 authored Jul 01, 2020 by syiming
20 changed files
--- a/research/object_detection/metrics/calibration_metrics_test.py
+++ b/research/object_detection/metrics/calibration_metrics_test.py
@@ -18,11 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import unittest
 import numpy as np
 import tensorflow.compat.v1 as tf
 from object_detection.metrics import calibration_metrics
+from object_detection.utils import tf_version


+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class CalibrationLibTest(tf.test.TestCase):

  @staticmethod

--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -24,6 +24,7 @@ import tensorflow.compat.v1 as tf
 from object_detection.core import standard_fields
 from object_detection.metrics import coco_tools
 from object_detection.utils import json_utils
+from object_detection.utils import np_mask_ops
 from object_detection.utils import object_detection_evaluation


@@ -1263,3 +1264,535 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
        eval_metric_ops[metric_name] = (tf.py_func(
            value_func_factory(metric_name), [], np.float32), update_op)
    return eval_metric_ops
+
+
+class CocoPanopticSegmentationEvaluator(
+    object_detection_evaluation.DetectionEvaluator):
+  """Class to evaluate PQ (panoptic quality) metric on COCO dataset.
+
+  More details about this metric: https://arxiv.org/pdf/1801.00868.pdf.
+  """
+
+  def __init__(self,
+               categories,
+               include_metrics_per_category=False,
+               iou_threshold=0.5,
+               ioa_threshold=0.5):
+    """Constructor.
+
+    Args:
+      categories: A list of dicts, each of which has the following keys -
+        'id': (required) an integer id uniquely identifying this category.
+        'name': (required) string representing category name e.g., 'cat', 'dog'.
+      include_metrics_per_category: If True, include metrics for each category.
+      iou_threshold: intersection-over-union threshold for mask matching (with
+        normal groundtruths).
+      ioa_threshold: intersection-over-area threshold for mask matching with
+        "is_crowd" groundtruths.
+    """
+    super(CocoPanopticSegmentationEvaluator, self).__init__(categories)
+    self._groundtruth_masks = {}
+    self._groundtruth_class_labels = {}
+    self._groundtruth_is_crowd = {}
+    self._predicted_masks = {}
+    self._predicted_class_labels = {}
+    self._include_metrics_per_category = include_metrics_per_category
+    self._iou_threshold = iou_threshold
+    self._ioa_threshold = ioa_threshold
+
+  def clear(self):
+    """Clears the state to prepare for a fresh evaluation."""
+    self._groundtruth_masks.clear()
+    self._groundtruth_class_labels.clear()
+    self._groundtruth_is_crowd.clear()
+    self._predicted_masks.clear()
+    self._predicted_class_labels.clear()
+
+  def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
+    """Adds groundtruth for a single image to be used for evaluation.
+
+    If the image has already been added, a warning is logged, and groundtruth is
+    ignored.
+
+    Args:
+      image_id: A unique string/integer identifier for the image.
+      groundtruth_dict: A dictionary containing -
+        InputDataFields.groundtruth_classes: integer numpy array of shape
+          [num_masks] containing 1-indexed groundtruth classes for the mask.
+        InputDataFields.groundtruth_instance_masks: uint8 numpy array of shape
+          [num_masks, image_height, image_width] containing groundtruth masks.
+          The elements of the array must be in {0, 1}.
+        InputDataFields.groundtruth_is_crowd (optional): integer numpy array of
+          shape [num_boxes] containing iscrowd flag for groundtruth boxes.
+    """
+
+    if image_id in self._groundtruth_masks:
+      tf.logging.warning(
+          'Ignoring groundtruth with image %s, since it has already been '
+          'added to the ground truth database.', image_id)
+      return
+
+    self._groundtruth_masks[image_id] = groundtruth_dict[
+        standard_fields.InputDataFields.groundtruth_instance_masks]
+    self._groundtruth_class_labels[image_id] = groundtruth_dict[
+        standard_fields.InputDataFields.groundtruth_classes]
+    groundtruth_is_crowd = groundtruth_dict.get(
+        standard_fields.InputDataFields.groundtruth_is_crowd)
+    # Drop groundtruth_is_crowd if empty tensor.
+    if groundtruth_is_crowd is not None and not groundtruth_is_crowd.size > 0:
+      groundtruth_is_crowd = None
+    if groundtruth_is_crowd is not None:
+      self._groundtruth_is_crowd[image_id] = groundtruth_is_crowd
+
+  def add_single_detected_image_info(self, image_id, detections_dict):
+    """Adds detections for a single image to be used for evaluation.
+
+    If a detection has already been added for this image id, a warning is
+    logged, and the detection is skipped.
+
+    Args:
+      image_id: A unique string/integer identifier for the image.
+      detections_dict: A dictionary containing -
+        DetectionResultFields.detection_classes: integer numpy array of shape
+          [num_masks] containing 1-indexed detection classes for the masks.
+        DetectionResultFields.detection_masks: optional uint8 numpy array of
+          shape [num_masks, image_height, image_width] containing instance
+          masks. The elements of the array must be in {0, 1}.
+
+    Raises:
+      ValueError: If results and groundtruth shape don't match.
+    """
+
+    if image_id not in self._groundtruth_masks:
+      raise ValueError('Missing groundtruth for image id: {}'.format(image_id))
+
+    detection_masks = detections_dict[
+        standard_fields.DetectionResultFields.detection_masks]
+    self._predicted_masks[image_id] = detection_masks
+    self._predicted_class_labels[image_id] = detections_dict[
+        standard_fields.DetectionResultFields.detection_classes]
+    groundtruth_mask_shape = self._groundtruth_masks[image_id].shape
+    if groundtruth_mask_shape[1:] != detection_masks.shape[1:]:
+      raise ValueError("The shape of results doesn't match groundtruth.")
+
+  def evaluate(self):
+    """Evaluates the detection masks and returns a dictionary of coco metrics.
+
+    Returns:
+      A dictionary holding -
+
+      1. summary_metric:
+      'PanopticQuality@%.2fIOU': mean panoptic quality averaged over classes at
+        the required IOU.
+      'SegmentationQuality@%.2fIOU': mean segmentation quality averaged over
+        classes at the required IOU.
+      'RecognitionQuality@%.2fIOU': mean recognition quality averaged over
+        classes at the required IOU.
+      'NumValidClasses': number of valid classes. A valid class should have at
+        least one normal (is_crowd=0) groundtruth mask or one predicted mask.
+      'NumTotalClasses': number of total classes.
+
+      2. per_category_pq: if include_metrics_per_category is True, category
+      specific results with keys of the form:
+      'PanopticQuality@%.2fIOU_ByCategory/category'.
+    """
+    # Evaluate and accumulate the iou/tp/fp/fn.
+    sum_tp_iou, sum_num_tp, sum_num_fp, sum_num_fn = self._evaluate_all_masks()
+    # Compute PQ metric for each category and average over all classes.
+    mask_metrics = self._compute_panoptic_metrics(sum_tp_iou, sum_num_tp,
+                                                  sum_num_fp, sum_num_fn)
+    return mask_metrics
+
+  def get_estimator_eval_metric_ops(self, eval_dict):
+    """Returns a dictionary of eval metric ops.
+
+    Note that once value_op is called, the detections and groundtruth added via
+    update_op are cleared.
+
+    Args:
+      eval_dict: A dictionary that holds tensors for evaluating object detection
+        performance. For single-image evaluation, this dictionary may be
+        produced from eval_util.result_dict_for_single_example(). If multi-image
+        evaluation, `eval_dict` should contain the fields
+        'num_gt_masks_per_image' and 'num_det_masks_per_image' to properly unpad
+        the tensors from the batch.
+
+    Returns:
+      a dictionary of metric names to tuple of value_op and update_op that can
+      be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all
+      update ops  must be run together and similarly all value ops must be run
+      together to guarantee correct behaviour.
+    """
+
+    def update_op(image_id_batched, groundtruth_classes_batched,
+                  groundtruth_instance_masks_batched,
+                  groundtruth_is_crowd_batched, num_gt_masks_per_image,
+                  detection_classes_batched, detection_masks_batched,
+                  num_det_masks_per_image):
+      """Update op for metrics."""
+      for (image_id, groundtruth_classes, groundtruth_instance_masks,
+           groundtruth_is_crowd, num_gt_mask, detection_classes,
+           detection_masks, num_det_mask) in zip(
+               image_id_batched, groundtruth_classes_batched,
+               groundtruth_instance_masks_batched, groundtruth_is_crowd_batched,
+               num_gt_masks_per_image, detection_classes_batched,
+               detection_masks_batched, num_det_masks_per_image):
+
+        self.add_single_ground_truth_image_info(
+            image_id, {
+                'groundtruth_classes':
+                    groundtruth_classes[:num_gt_mask],
+                'groundtruth_instance_masks':
+                    groundtruth_instance_masks[:num_gt_mask],
+                'groundtruth_is_crowd':
+                    groundtruth_is_crowd[:num_gt_mask]
+            })
+        self.add_single_detected_image_info(
+            image_id, {
+                'detection_classes': detection_classes[:num_det_mask],
+                'detection_masks': detection_masks[:num_det_mask]
+            })
+
+    # Unpack items from the evaluation dictionary.
+    (image_id, groundtruth_classes, groundtruth_instance_masks,
+     groundtruth_is_crowd, num_gt_masks_per_image, detection_classes,
+     detection_masks, num_det_masks_per_image
+    ) = self._unpack_evaluation_dictionary_items(eval_dict)
+
+    update_op = tf.py_func(update_op, [
+        image_id, groundtruth_classes, groundtruth_instance_masks,
+        groundtruth_is_crowd, num_gt_masks_per_image, detection_classes,
+        detection_masks, num_det_masks_per_image
+    ], [])
+
+    metric_names = [
+        'PanopticQuality@%.2fIOU' % self._iou_threshold,
+        'SegmentationQuality@%.2fIOU' % self._iou_threshold,
+        'RecognitionQuality@%.2fIOU' % self._iou_threshold
+    ]
+    if self._include_metrics_per_category:
+      for category_dict in self._categories:
+        metric_names.append('PanopticQuality@%.2fIOU_ByCategory/%s' %
+                            (self._iou_threshold, category_dict['name']))
+
+    def first_value_func():
+      self._metrics = self.evaluate()
+      self.clear()
+      return np.float32(self._metrics[metric_names[0]])
+
+    def value_func_factory(metric_name):
+
+      def value_func():
+        return np.float32(self._metrics[metric_name])
+
+      return value_func
+
+    # Ensure that the metrics are only evaluated once.
+    first_value_op = tf.py_func(first_value_func, [], tf.float32)
+    eval_metric_ops = {metric_names[0]: (first_value_op, update_op)}
+    with tf.control_dependencies([first_value_op]):
+      for metric_name in metric_names[1:]:
+        eval_metric_ops[metric_name] = (tf.py_func(
+            value_func_factory(metric_name), [], np.float32), update_op)
+    return eval_metric_ops
+
+  def _evaluate_all_masks(self):
+    """Evaluate all masks and compute sum iou/TP/FP/FN."""
+
+    sum_num_tp = {category['id']: 0 for category in self._categories}
+    sum_num_fp = sum_num_tp.copy()
+    sum_num_fn = sum_num_tp.copy()
+    sum_tp_iou = sum_num_tp.copy()
+
+    for image_id in self._groundtruth_class_labels:
+      # Separate normal and is_crowd groundtruth
+      crowd_gt_indices = self._groundtruth_is_crowd.get(image_id)
+      (normal_gt_masks, normal_gt_classes, crowd_gt_masks,
+       crowd_gt_classes) = self._separate_normal_and_crowd_labels(
+           crowd_gt_indices, self._groundtruth_masks[image_id],
+           self._groundtruth_class_labels[image_id])
+
+      # Mask matching to normal GT.
+      predicted_masks = self._predicted_masks[image_id]
+      predicted_class_labels = self._predicted_class_labels[image_id]
+      (overlaps, pred_matched,
+       gt_matched) = self._match_predictions_to_groundtruths(
+           predicted_masks,
+           predicted_class_labels,
+           normal_gt_masks,
+           normal_gt_classes,
+           self._iou_threshold,
+           is_crowd=False,
+           with_replacement=False)
+
+      # Accumulate true positives.
+      for (class_id, is_matched, overlap) in zip(predicted_class_labels,
+                                                 pred_matched, overlaps):
+        if is_matched:
+          sum_num_tp[class_id] += 1
+          sum_tp_iou[class_id] += overlap
+
+      # Accumulate false negatives.
+      for (class_id, is_matched) in zip(normal_gt_classes, gt_matched):
+        if not is_matched:
+          sum_num_fn[class_id] += 1
+
+      # Match remaining predictions to crowd gt.
+      remained_pred_indices = np.logical_not(pred_matched)
+      remained_pred_masks = predicted_masks[remained_pred_indices, :, :]
+      remained_pred_classes = predicted_class_labels[remained_pred_indices]
+      _, pred_matched, _ = self._match_predictions_to_groundtruths(
+          remained_pred_masks,
+          remained_pred_classes,
+          crowd_gt_masks,
+          crowd_gt_classes,
+          self._ioa_threshold,
+          is_crowd=True,
+          with_replacement=True)
+
+      # Accumulate false positives
+      for (class_id, is_matched) in zip(remained_pred_classes, pred_matched):
+        if not is_matched:
+          sum_num_fp[class_id] += 1
+    return sum_tp_iou, sum_num_tp, sum_num_fp, sum_num_fn
+
+  def _compute_panoptic_metrics(self, sum_tp_iou, sum_num_tp, sum_num_fp,
+                                sum_num_fn):
+    """Compute PQ metric for each category and average over all classes.
+
+    Args:
+      sum_tp_iou: dict, summed true positive intersection-over-union (IoU) for
+        each class, keyed by class_id.
+      sum_num_tp: the total number of true positives for each class, keyed by
+        class_id.
+      sum_num_fp: the total number of false positives for each class, keyed by
+        class_id.
+      sum_num_fn: the total number of false negatives for each class, keyed by
+        class_id.
+
+    Returns:
+      mask_metrics: a dictionary containing averaged metrics over all classes,
+        and per-category metrics if required.
+    """
+    mask_metrics = {}
+    sum_pq = 0
+    sum_sq = 0
+    sum_rq = 0
+    num_valid_classes = 0
+    for category in self._categories:
+      class_id = category['id']
+      (panoptic_quality, segmentation_quality,
+       recognition_quality) = self._compute_panoptic_metrics_single_class(
+           sum_tp_iou[class_id], sum_num_tp[class_id], sum_num_fp[class_id],
+           sum_num_fn[class_id])
+      if panoptic_quality is not None:
+        sum_pq += panoptic_quality
+        sum_sq += segmentation_quality
+        sum_rq += recognition_quality
+        num_valid_classes += 1
+        if self._include_metrics_per_category:
+          mask_metrics['PanopticQuality@%.2fIOU_ByCategory/%s' %
+                       (self._iou_threshold,
+                        category['name'])] = panoptic_quality
+    mask_metrics['PanopticQuality@%.2fIOU' %
+                 self._iou_threshold] = sum_pq / num_valid_classes
+    mask_metrics['SegmentationQuality@%.2fIOU' %
+                 self._iou_threshold] = sum_sq / num_valid_classes
+    mask_metrics['RecognitionQuality@%.2fIOU' %
+                 self._iou_threshold] = sum_rq / num_valid_classes
+    mask_metrics['NumValidClasses'] = num_valid_classes
+    mask_metrics['NumTotalClasses'] = len(self._categories)
+    return mask_metrics
+
+  def _compute_panoptic_metrics_single_class(self, sum_tp_iou, num_tp, num_fp,
+                                             num_fn):
+    """Compute panoptic metrics: panoptic/segmentation/recognition quality.
+
+    More computation details in https://arxiv.org/pdf/1801.00868.pdf.
+    Args:
+      sum_tp_iou: summed true positive intersection-over-union (IoU) for a
+        specific class.
+      num_tp: the total number of true positives for a specific class.
+      num_fp: the total number of false positives for a specific class.
+      num_fn: the total number of false negatives for a specific class.
+
+    Returns:
+      panoptic_quality: sum_tp_iou / (num_tp + 0.5*num_fp + 0.5*num_fn).
+      segmentation_quality: sum_tp_iou / num_tp.
+      recognition_quality: num_tp / (num_tp + 0.5*num_fp + 0.5*num_fn).
+    """
+    denominator = num_tp + 0.5 * num_fp + 0.5 * num_fn
+    # Calculate metric only if there is at least one GT or one prediction.
+    if denominator > 0:
+      recognition_quality = num_tp / denominator
+      if num_tp > 0:
+        segmentation_quality = sum_tp_iou / num_tp
+      else:
+        # If there is no TP for this category.
+        segmentation_quality = 0
+      panoptic_quality = segmentation_quality * recognition_quality
+      return panoptic_quality, segmentation_quality, recognition_quality
+    else:
+      return None, None, None
+
+  def _separate_normal_and_crowd_labels(self, crowd_gt_indices,
+                                        groundtruth_masks, groundtruth_classes):
+    """Separate normal and crowd groundtruth class_labels and masks.
+
+    Args:
+      crowd_gt_indices: None or array of shape [num_groundtruths]. If None, all
+        groundtruths are treated as normal ones.
+      groundtruth_masks: array of shape [num_groundtruths, height, width].
+      groundtruth_classes: array of shape [num_groundtruths].
+
+    Returns:
+      normal_gt_masks: array of shape [num_normal_groundtruths, height, width].
+      normal_gt_classes: array of shape [num_normal_groundtruths].
+      crowd_gt_masks: array of shape [num_crowd_groundtruths, height, width].
+      crowd_gt_classes: array of shape [num_crowd_groundtruths].
+    Raises:
+      ValueError: if the shape of groundtruth classes doesn't match groundtruth
+        masks or if the shape of crowd_gt_indices.
+    """
+    if groundtruth_masks.shape[0] != groundtruth_classes.shape[0]:
+      raise ValueError(
+          "The number of masks doesn't match the number of labels.")
+    if crowd_gt_indices is None:
+      # All gts are treated as normal
+      crowd_gt_indices = np.zeros(groundtruth_masks.shape, dtype=np.bool)
+    else:
+      if groundtruth_masks.shape[0] != crowd_gt_indices.shape[0]:
+        raise ValueError(
+            "The number of masks doesn't match the number of is_crowd labels.")
+      crowd_gt_indices = crowd_gt_indices.astype(np.bool)
+    normal_gt_indices = np.logical_not(crowd_gt_indices)
+    if normal_gt_indices.size:
+      normal_gt_masks = groundtruth_masks[normal_gt_indices, :, :]
+      normal_gt_classes = groundtruth_classes[normal_gt_indices]
+      crowd_gt_masks = groundtruth_masks[crowd_gt_indices, :, :]
+      crowd_gt_classes = groundtruth_classes[crowd_gt_indices]
+    else:
+      # No groundtruths available, groundtruth_masks.shape = (0, h, w)
+      normal_gt_masks = groundtruth_masks
+      normal_gt_classes = groundtruth_classes
+      crowd_gt_masks = groundtruth_masks
+      crowd_gt_classes = groundtruth_classes
+    return normal_gt_masks, normal_gt_classes, crowd_gt_masks, crowd_gt_classes
+
+  def _match_predictions_to_groundtruths(self,
+                                         predicted_masks,
+                                         predicted_classes,
+                                         groundtruth_masks,
+                                         groundtruth_classes,
+                                         matching_threshold,
+                                         is_crowd=False,
+                                         with_replacement=False):
+    """Match the predicted masks to groundtruths.
+
+    Args:
+      predicted_masks: array of shape [num_predictions, height, width].
+      predicted_classes: array of shape [num_predictions].
+      groundtruth_masks: array of shape [num_groundtruths, height, width].
+      groundtruth_classes: array of shape [num_groundtruths].
+      matching_threshold: if the overlap between a prediction and a groundtruth
+        is larger than this threshold, the prediction is true positive.
+      is_crowd: whether the groundtruths are crowd annotation or not. If True,
+        use intersection over area (IoA) as the overlapping metric; otherwise
+        use intersection over union (IoU).
+      with_replacement: whether a groundtruth can be matched to multiple
+        predictions. By default, for normal groundtruths, only 1-1 matching is
+        allowed for normal groundtruths; for crowd groundtruths, 1-to-many must
+        be allowed.
+
+    Returns:
+      best_overlaps: array of shape [num_predictions]. Values representing the
+      IoU
+        or IoA with best matched groundtruth.
+      pred_matched: array of shape [num_predictions]. Boolean value representing
+        whether the ith prediction is matched to a groundtruth.
+      gt_matched: array of shape [num_groundtruth]. Boolean value representing
+        whether the ith groundtruth is matched to a prediction.
+    Raises:
+      ValueError: if the shape of groundtruth/predicted masks doesn't match
+        groundtruth/predicted classes.
+    """
+    if groundtruth_masks.shape[0] != groundtruth_classes.shape[0]:
+      raise ValueError(
+          "The number of GT masks doesn't match the number of labels.")
+    if predicted_masks.shape[0] != predicted_classes.shape[0]:
+      raise ValueError(
+          "The number of predicted masks doesn't match the number of labels.")
+    gt_matched = np.zeros(groundtruth_classes.shape, dtype=np.bool)
+    pred_matched = np.zeros(predicted_classes.shape, dtype=np.bool)
+    best_overlaps = np.zeros(predicted_classes.shape)
+    for pid in range(predicted_classes.shape[0]):
+      best_overlap = 0
+      matched_gt_id = -1
+      for gid in range(groundtruth_classes.shape[0]):
+        if predicted_classes[pid] == groundtruth_classes[gid]:
+          if (not with_replacement) and gt_matched[gid]:
+            continue
+          if not is_crowd:
+            overlap = np_mask_ops.iou(predicted_masks[pid:pid + 1],
+                                      groundtruth_masks[gid:gid + 1])[0, 0]
+          else:
+            overlap = np_mask_ops.ioa(groundtruth_masks[gid:gid + 1],
+                                      predicted_masks[pid:pid + 1])[0, 0]
+          if overlap >= matching_threshold and overlap > best_overlap:
+            matched_gt_id = gid
+            best_overlap = overlap
+      if matched_gt_id >= 0:
+        gt_matched[matched_gt_id] = True
+        pred_matched[pid] = True
+        best_overlaps[pid] = best_overlap
+    return best_overlaps, pred_matched, gt_matched
+
+  def _unpack_evaluation_dictionary_items(self, eval_dict):
+    """Unpack items from the evaluation dictionary."""
+    input_data_fields = standard_fields.InputDataFields
+    detection_fields = standard_fields.DetectionResultFields
+    image_id = eval_dict[input_data_fields.key]
+    groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes]
+    groundtruth_instance_masks = eval_dict[
+        input_data_fields.groundtruth_instance_masks]
+    groundtruth_is_crowd = eval_dict.get(input_data_fields.groundtruth_is_crowd,
+                                         None)
+    num_gt_masks_per_image = eval_dict.get(
+        input_data_fields.num_groundtruth_boxes, None)
+    detection_classes = eval_dict[detection_fields.detection_classes]
+    detection_masks = eval_dict[detection_fields.detection_masks]
+    num_det_masks_per_image = eval_dict.get(detection_fields.num_detections,
+                                            None)
+    if groundtruth_is_crowd is None:
+      groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
+
+    if not image_id.shape.as_list():
+      # Apply a batch dimension to all tensors.
+      image_id = tf.expand_dims(image_id, 0)
+      groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
+      groundtruth_instance_masks = tf.expand_dims(groundtruth_instance_masks, 0)
+      groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0)
+      detection_classes = tf.expand_dims(detection_classes, 0)
+      detection_masks = tf.expand_dims(detection_masks, 0)
+
+      if num_gt_masks_per_image is None:
+        num_gt_masks_per_image = tf.shape(groundtruth_classes)[1:2]
+      else:
+        num_gt_masks_per_image = tf.expand_dims(num_gt_masks_per_image, 0)
+
+      if num_det_masks_per_image is None:
+        num_det_masks_per_image = tf.shape(detection_classes)[1:2]
+      else:
+        num_det_masks_per_image = tf.expand_dims(num_det_masks_per_image, 0)
+    else:
+      if num_gt_masks_per_image is None:
+        num_gt_masks_per_image = tf.tile(
+            tf.shape(groundtruth_classes)[1:2],
+            multiples=tf.shape(groundtruth_classes)[0:1])
+      if num_det_masks_per_image is None:
+        num_det_masks_per_image = tf.tile(
+            tf.shape(detection_classes)[1:2],
+            multiples=tf.shape(detection_classes)[0:1])
+    return (image_id, groundtruth_classes, groundtruth_instance_masks,
+            groundtruth_is_crowd, num_gt_masks_per_image, detection_classes,
+            detection_masks, num_det_masks_per_image)
--- a/research/object_detection/metrics/coco_evaluation_test.py
+++ b/research/object_detection/metrics/coco_evaluation_test.py
@@ -18,10 +18,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import unittest
 import numpy as np
 import tensorflow.compat.v1 as tf
 from object_detection.core import standard_fields
 from object_detection.metrics import coco_evaluation
+from object_detection.utils import tf_version


 def _get_categories_list():
@@ -250,6 +252,7 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
          })


+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
 class CocoEvaluationPyFuncTest(tf.test.TestCase):

  def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
@@ -926,6 +929,7 @@ class CocoKeypointEvaluationTest(tf.test.TestCase):
                           -1.0)


+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
 class CocoKeypointEvaluationPyFuncTest(tf.test.TestCase):

  def testGetOneMAPWithMatchingKeypoints(self):
@@ -1438,6 +1442,7 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
    self.assertFalse(coco_evaluator._detection_masks_list)


+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
 class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):

  def testAddEvalDict(self):
@@ -1716,5 +1721,221 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
    self.assertFalse(coco_evaluator._detection_masks_list)


+def _get_panoptic_test_data():
+  # image1 contains 3 people in gt, (2 normal annotation and 1 "is_crowd"
+  # annotation), and 3 people in prediction.
+  gt_masks1 = np.zeros((3, 50, 50), dtype=np.uint8)
+  result_masks1 = np.zeros((3, 50, 50), dtype=np.uint8)
+  gt_masks1[0, 10:20, 20:30] = 1
+  result_masks1[0, 10:18, 20:30] = 1
+  gt_masks1[1, 25:30, 25:35] = 1
+  result_masks1[1, 18:25, 25:30] = 1
+  gt_masks1[2, 40:50, 40:50] = 1
+  result_masks1[2, 47:50, 47:50] = 1
+  gt_class1 = np.array([1, 1, 1])
+  gt_is_crowd1 = np.array([0, 0, 1])
+  result_class1 = np.array([1, 1, 1])
+
+  # image2 contains 1 dog and 1 cat in gt, while 1 person and 1 dog in
+  # prediction.
+  gt_masks2 = np.zeros((2, 30, 40), dtype=np.uint8)
+  result_masks2 = np.zeros((2, 30, 40), dtype=np.uint8)
+  gt_masks2[0, 5:15, 20:35] = 1
+  gt_masks2[1, 20:30, 0:10] = 1
+  result_masks2[0, 20:25, 10:15] = 1
+  result_masks2[1, 6:15, 15:35] = 1
+  gt_class2 = np.array([2, 3])
+  gt_is_crowd2 = np.array([0, 0])
+  result_class2 = np.array([1, 2])
+
+  gt_class = [gt_class1, gt_class2]
+  gt_masks = [gt_masks1, gt_masks2]
+  gt_is_crowd = [gt_is_crowd1, gt_is_crowd2]
+  result_class = [result_class1, result_class2]
+  result_masks = [result_masks1, result_masks2]
+  return gt_class, gt_masks, gt_is_crowd, result_class, result_masks
+
+
+class CocoPanopticEvaluationTest(tf.test.TestCase):
+
+  def test_panoptic_quality(self):
+    pq_evaluator = coco_evaluation.CocoPanopticSegmentationEvaluator(
+        _get_categories_list(), include_metrics_per_category=True)
+    (gt_class, gt_masks, gt_is_crowd, result_class,
+     result_masks) = _get_panoptic_test_data()
+
+    for i in range(2):
+      pq_evaluator.add_single_ground_truth_image_info(
+          image_id='image%d' % i,
+          groundtruth_dict={
+              standard_fields.InputDataFields.groundtruth_classes:
+                  gt_class[i],
+              standard_fields.InputDataFields.groundtruth_instance_masks:
+                  gt_masks[i],
+              standard_fields.InputDataFields.groundtruth_is_crowd:
+                  gt_is_crowd[i]
+          })
+
+      pq_evaluator.add_single_detected_image_info(
+          image_id='image%d' % i,
+          detections_dict={
+              standard_fields.DetectionResultFields.detection_classes:
+                  result_class[i],
+              standard_fields.DetectionResultFields.detection_masks:
+                  result_masks[i]
+          })
+
+    metrics = pq_evaluator.evaluate()
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU_ByCategory/person'],
+                           0.32)
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU_ByCategory/dog'],
+                           135.0 / 195)
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU_ByCategory/cat'], 0)
+    self.assertAlmostEqual(metrics['SegmentationQuality@0.50IOU'],
+                           (0.8 + 135.0 / 195) / 3)
+    self.assertAlmostEqual(metrics['RecognitionQuality@0.50IOU'], (0.4 + 1) / 3)
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU'],
+                           (0.32 + 135.0 / 195) / 3)
+    self.assertEqual(metrics['NumValidClasses'], 3)
+    self.assertEqual(metrics['NumTotalClasses'], 3)
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X')
+class CocoPanopticEvaluationPyFuncTest(tf.test.TestCase):
+
+  def testPanopticQualityNoBatch(self):
+    pq_evaluator = coco_evaluation.CocoPanopticSegmentationEvaluator(
+        _get_categories_list(), include_metrics_per_category=True)
+
+    image_id = tf.placeholder(tf.string, shape=())
+    groundtruth_classes = tf.placeholder(tf.int32, shape=(None))
+    groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
+    groundtruth_is_crowd = tf.placeholder(tf.int32, shape=(None))
+    detection_classes = tf.placeholder(tf.int32, shape=(None))
+    detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
+
+    input_data_fields = standard_fields.InputDataFields
+    detection_fields = standard_fields.DetectionResultFields
+    eval_dict = {
+        input_data_fields.key: image_id,
+        input_data_fields.groundtruth_classes: groundtruth_classes,
+        input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.groundtruth_is_crowd: groundtruth_is_crowd,
+        detection_fields.detection_classes: detection_classes,
+        detection_fields.detection_masks: detection_masks,
+    }
+
+    eval_metric_ops = pq_evaluator.get_estimator_eval_metric_ops(eval_dict)
+
+    _, update_op = eval_metric_ops['PanopticQuality@0.50IOU']
+    (gt_class, gt_masks, gt_is_crowd, result_class,
+     result_masks) = _get_panoptic_test_data()
+
+    with self.test_session() as sess:
+      for i in range(2):
+        sess.run(
+            update_op,
+            feed_dict={
+                image_id: 'image%d' % i,
+                groundtruth_classes: gt_class[i],
+                groundtruth_masks: gt_masks[i],
+                groundtruth_is_crowd: gt_is_crowd[i],
+                detection_classes: result_class[i],
+                detection_masks: result_masks[i]
+            })
+    metrics = {}
+    for key, (value_op, _) in eval_metric_ops.items():
+      metrics[key] = value_op
+    metrics = sess.run(metrics)
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU'],
+                           (0.32 + 135.0 / 195) / 3)
+
+  def testPanopticQualityBatched(self):
+    pq_evaluator = coco_evaluation.CocoPanopticSegmentationEvaluator(
+        _get_categories_list(), include_metrics_per_category=True)
+    batch_size = 2
+    image_id = tf.placeholder(tf.string, shape=(batch_size))
+    groundtruth_classes = tf.placeholder(tf.int32, shape=(batch_size, None))
+    groundtruth_masks = tf.placeholder(
+        tf.uint8, shape=(batch_size, None, None, None))
+    groundtruth_is_crowd = tf.placeholder(tf.int32, shape=(batch_size, None))
+    detection_classes = tf.placeholder(tf.int32, shape=(batch_size, None))
+    detection_masks = tf.placeholder(
+        tf.uint8, shape=(batch_size, None, None, None))
+    num_gt_masks_per_image = tf.placeholder(tf.int32, shape=(batch_size))
+    num_det_masks_per_image = tf.placeholder(tf.int32, shape=(batch_size))
+
+    input_data_fields = standard_fields.InputDataFields
+    detection_fields = standard_fields.DetectionResultFields
+    eval_dict = {
+        input_data_fields.key: image_id,
+        input_data_fields.groundtruth_classes: groundtruth_classes,
+        input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.groundtruth_is_crowd: groundtruth_is_crowd,
+        input_data_fields.num_groundtruth_boxes: num_gt_masks_per_image,
+        detection_fields.detection_classes: detection_classes,
+        detection_fields.detection_masks: detection_masks,
+        detection_fields.num_detections: num_det_masks_per_image,
+    }
+
+    eval_metric_ops = pq_evaluator.get_estimator_eval_metric_ops(eval_dict)
+
+    _, update_op = eval_metric_ops['PanopticQuality@0.50IOU']
+    (gt_class, gt_masks, gt_is_crowd, result_class,
+     result_masks) = _get_panoptic_test_data()
+    with self.test_session() as sess:
+      sess.run(
+          update_op,
+          feed_dict={
+              image_id: ['image0', 'image1'],
+              groundtruth_classes:
+                  np.stack([
+                      gt_class[0],
+                      np.pad(gt_class[1], (0, 1), mode='constant')
+                  ],
+                           axis=0),
+              groundtruth_masks:
+                  np.stack([
+                      np.pad(
+                          gt_masks[0], ((0, 0), (0, 10), (0, 10)),
+                          mode='constant'),
+                      np.pad(
+                          gt_masks[1], ((0, 1), (0, 30), (0, 20)),
+                          mode='constant'),
+                  ],
+                           axis=0),
+              groundtruth_is_crowd:
+                  np.stack([
+                      gt_is_crowd[0],
+                      np.pad(gt_is_crowd[1], (0, 1), mode='constant')
+                  ],
+                           axis=0),
+              num_gt_masks_per_image: np.array([3, 2]),
+              detection_classes:
+                  np.stack([
+                      result_class[0],
+                      np.pad(result_class[1], (0, 1), mode='constant')
+                  ],
+                           axis=0),
+              detection_masks:
+                  np.stack([
+                      np.pad(
+                          result_masks[0], ((0, 0), (0, 10), (0, 10)),
+                          mode='constant'),
+                      np.pad(
+                          result_masks[1], ((0, 1), (0, 30), (0, 20)),
+                          mode='constant'),
+                  ],
+                           axis=0),
+              num_det_masks_per_image: np.array([3, 2]),
+          })
+    metrics = {}
+    for key, (value_op, _) in eval_metric_ops.items():
+      metrics[key] = value_op
+    metrics = sess.run(metrics)
+    self.assertAlmostEqual(metrics['PanopticQuality@0.50IOU'],
+                           (0.32 + 135.0 / 195) / 3)
+
+
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/metrics/coco_tools.py
+++ b/research/object_detection/metrics/coco_tools.py
@@ -52,6 +52,7 @@ from pycocotools import coco
 from pycocotools import cocoeval
 from pycocotools import mask

+import six
 from six.moves import range
 from six.moves import zip
 import tensorflow.compat.v1 as tf
@@ -353,7 +354,9 @@ def _RleCompress(masks):
  Returns:
    A pycocotools Run-length encoding of the mask.
  """
-  return mask.encode(np.asfortranarray(masks))
+  rle = mask.encode(np.asfortranarray(masks))
+  rle['counts'] = six.ensure_str(rle['counts'])
+  return rle


 def ExportSingleImageGroundtruthToCoco(image_id,

--- a/research/object_detection/metrics/offline_eval_map_corloc.py
+++ b/research/object_detection/metrics/offline_eval_map_corloc.py
@@ -36,8 +36,8 @@ import os
 import re
 import tensorflow.compat.v1 as tf

+from object_detection import eval_util
 from object_detection.core import standard_fields
-from object_detection.legacy import evaluator
 from object_detection.metrics import tf_example_parser
 from object_detection.utils import config_util
 from object_detection.utils import label_map_util
@@ -94,7 +94,7 @@ def read_data_and_evaluate(input_config, eval_config):
    categories = label_map_util.create_categories_from_labelmap(
        input_config.label_map_path)

-    object_detection_evaluators = evaluator.get_evaluators(
+    object_detection_evaluators = eval_util.get_evaluators(
        eval_config, categories)
    # Support a single evaluator
    object_detection_evaluator = object_detection_evaluators[0]

--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -23,9 +23,9 @@ import functools
 import os

 import tensorflow.compat.v1 as tf
+import tensorflow.compat.v2 as tf2
 import tf_slim as slim

-
 from object_detection import eval_util
 from object_detection import exporter as exporter_lib
 from object_detection import inputs
@@ -349,7 +349,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
      from tensorflow.python.keras.engine import base_layer_utils  # pylint: disable=g-import-not-at-top
      # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0.
      base_layer_utils.enable_v2_dtype_behavior()
-      tf.compat.v2.keras.mixed_precision.experimental.set_policy(
+      tf2.keras.mixed_precision.experimental.set_policy(
          'mixed_bfloat16')
    detection_model = detection_model_fn(
        is_training=is_training, add_summaries=(not use_tpu))

--- a/research/object_detection/model_lib_test.py
+++ b/research/object_detection/model_lib_test.py
@@ -20,19 +20,17 @@ from __future__ import print_function

 import functools
 import os
-
+import unittest
 import numpy as np
 import tensorflow.compat.v1 as tf

-from tensorflow.contrib.tpu.python.tpu import tpu_config
-from tensorflow.contrib.tpu.python.tpu import tpu_estimator
-
 from object_detection import inputs
 from object_detection import model_hparams
 from object_detection import model_lib
 from object_detection.builders import model_builder
 from object_detection.core import standard_fields as fields
 from object_detection.utils import config_util
+from object_detection.utils import tf_version


 # Model for test. Options are:
@@ -122,6 +120,7 @@ def _make_initializable_iterator(dataset):
  return iterator


+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class ModelLibTest(tf.test.TestCase):

  @classmethod
@@ -337,8 +336,7 @@ class ModelLibTest(tf.test.TestCase):

  def test_create_tpu_estimator_and_inputs(self):
    """Tests that number of train/eval defaults to config values."""
-
-    run_config = tpu_config.RunConfig()
+    run_config = tf.estimator.tpu.RunConfig()
    hparams = model_hparams.create_hparams(
        hparams_overrides='load_pretrained=false')
    pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
@@ -352,7 +350,7 @@ class ModelLibTest(tf.test.TestCase):
    estimator = train_and_eval_dict['estimator']
    train_steps = train_and_eval_dict['train_steps']

-    self.assertIsInstance(estimator, tpu_estimator.TPUEstimator)
+    self.assertIsInstance(estimator, tf.estimator.tpu.TPUEstimator)
    self.assertEqual(20, train_steps)

  def test_create_train_and_eval_specs(self):
@@ -406,6 +404,7 @@ class ModelLibTest(tf.test.TestCase):
    self.assertEqual(None, experiment.eval_steps)


+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class UnbatchTensorsTest(tf.test.TestCase):

  def test_unbatch_without_unpadding(self):

--- a/research/object_detection/model_lib_v2_test.py
+++ b/research/object_detection/model_lib_v2_test.py
@@ -20,18 +20,19 @@ from __future__ import print_function

 import os
 import tempfile
-
+import unittest
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
+import tensorflow.compat.v2 as tf2

 from object_detection import inputs
-from object_detection import model_hparams
 from object_detection import model_lib_v2
 from object_detection.builders import model_builder
 from object_detection.core import model
 from object_detection.protos import train_pb2
 from object_detection.utils import config_util
+from object_detection.utils import tf_version

 if six.PY2:
  import mock  # pylint: disable=g-importing-member,g-import-not-at-top
@@ -72,6 +73,7 @@ def _get_config_kwarg_overrides():
  }


+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
 class ModelLibTest(tf.test.TestCase):

  @classmethod
@@ -80,24 +82,25 @@ class ModelLibTest(tf.test.TestCase):

  def test_train_loop_then_eval_loop(self):
    """Tests that Estimator and input function are constructed correctly."""
-    hparams = model_hparams.create_hparams(
-        hparams_overrides='load_pretrained=false')
+    model_dir = tf.test.get_temp_dir()
    pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
+    new_pipeline_config_path = os.path.join(model_dir, 'new_pipeline.config')
+    config_util.clear_fine_tune_checkpoint(pipeline_config_path,
+                                           new_pipeline_config_path)
    config_kwarg_overrides = _get_config_kwarg_overrides()
-    model_dir = tf.test.get_temp_dir()

    train_steps = 2
-    model_lib_v2.train_loop(
-        hparams,
-        pipeline_config_path,
-        model_dir=model_dir,
-        train_steps=train_steps,
-        checkpoint_every_n=1,
-        **config_kwarg_overrides)
+    strategy = tf2.distribute.OneDeviceStrategy(device='/cpu:0')
+    with strategy.scope():
+      model_lib_v2.train_loop(
+          new_pipeline_config_path,
+          model_dir=model_dir,
+          train_steps=train_steps,
+          checkpoint_every_n=1,
+          **config_kwarg_overrides)

    model_lib_v2.eval_continuously(
-        hparams,
-        pipeline_config_path,
+        new_pipeline_config_path,
        model_dir=model_dir,
        checkpoint_dir=model_dir,
        train_steps=train_steps,
@@ -120,6 +123,9 @@ class SimpleModel(model.DetectionModel):
    return []

  def restore_map(self, *args, **kwargs):
+    pass
+
+  def restore_from_objects(self, fine_tune_checkpoint_type):
    return {'model': self}

  def preprocess(self, _):
@@ -139,27 +145,31 @@ class SimpleModel(model.DetectionModel):
    return []


+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
 class ModelCheckpointTest(tf.test.TestCase):
  """Test for model checkpoint related functionality."""

  def test_checkpoint_max_to_keep(self):
    """Test that only the most recent checkpoints are kept."""

+    strategy = tf2.distribute.OneDeviceStrategy(device='/cpu:0')
    with mock.patch.object(
        model_builder, 'build', autospec=True) as mock_builder:
-      mock_builder.return_value = SimpleModel()
-
-      hparams = model_hparams.create_hparams(
-          hparams_overrides='load_pretrained=false')
+      with strategy.scope():
+        mock_builder.return_value = SimpleModel()
+      model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
      pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
+      new_pipeline_config_path = os.path.join(model_dir, 'new_pipeline.config')
+      config_util.clear_fine_tune_checkpoint(pipeline_config_path,
+                                             new_pipeline_config_path)
      config_kwarg_overrides = _get_config_kwarg_overrides()
-      model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())

-      model_lib_v2.train_loop(
-          hparams, pipeline_config_path, model_dir=model_dir,
-          train_steps=20, checkpoint_every_n=2, checkpoint_max_to_keep=3,
-          **config_kwarg_overrides
-      )
+      with strategy.scope():
+        model_lib_v2.train_loop(
+            new_pipeline_config_path, model_dir=model_dir,
+            train_steps=20, checkpoint_every_n=2, checkpoint_max_to_keep=3,
+            **config_kwarg_overrides
+        )
      ckpt_files = tf.io.gfile.glob(os.path.join(model_dir, 'ckpt-*.index'))
      self.assertEqual(len(ckpt_files), 3,
                       '{} not of length 3.'.format(ckpt_files))
@@ -167,10 +177,11 @@ class ModelCheckpointTest(tf.test.TestCase):

 class IncompatibleModel(SimpleModel):

-  def restore_map(self, *args, **kwargs):
+  def restore_from_objects(self, *args, **kwargs):
    return {'weight': self.weight}


+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
 class CheckpointV2Test(tf.test.TestCase):

  def setUp(self):
@@ -199,7 +210,6 @@ class CheckpointV2Test(tf.test.TestCase):
    model_lib_v2.load_fine_tune_checkpoint(
        self._model, self._ckpt_path, checkpoint_type='',
        checkpoint_version=train_pb2.CheckpointVersion.V2,
-        load_all_detection_checkpoint_vars=True,
        input_dataset=self._train_input_fn(),
        unpad_groundtruth_tensors=True)
    np.testing.assert_allclose(self._model.weight.numpy(), 42)
@@ -212,8 +222,9 @@ class CheckpointV2Test(tf.test.TestCase):
      model_lib_v2.load_fine_tune_checkpoint(
          IncompatibleModel(), self._ckpt_path, checkpoint_type='',
          checkpoint_version=train_pb2.CheckpointVersion.V2,
-          load_all_detection_checkpoint_vars=True,
          input_dataset=self._train_input_fn(),
          unpad_groundtruth_tensors=True)


+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -34,7 +34,6 @@ from object_detection.protos import train_pb2
 from object_detection.utils import config_util
 from object_detection.utils import label_map_util
 from object_detection.utils import ops
-from object_detection.utils import variables_helper
 from object_detection.utils import visualization_utils as vutils

 # pylint: disable=g-import-not-at-top
@@ -47,13 +46,6 @@ except ImportError:

 MODEL_BUILD_UTIL_MAP = model_lib.MODEL_BUILD_UTIL_MAP

-### NOTE: This file is a wip.
-### TODO(kaftan): Explore adding unit tests for individual methods
-### TODO(kaftan): Add unit test that checks training on a single image w/
-#### groundtruth, and verfiy that loss goes to zero.
-#### Possibly have version that takes it as the whole train & eval dataset,
-#### & verify the loss output from the eval_loop method.
-### TODO(kaftan): Make sure the unit tests run in TAP presubmits or Kokoro

 RESTORE_MAP_ERROR_TEMPLATE = (
    'Since we are restoring a v2 style checkpoint'
@@ -277,14 +269,21 @@ def validate_tf_v2_checkpoint_restore_map(checkpoint_restore_map):
  """

  for key, value in checkpoint_restore_map.items():
-    if not (isinstance(key, str) and isinstance(value, tf.Module)):
+    if not (isinstance(key, str) and
+            (isinstance(value, tf.Module)
+             or isinstance(value, tf.train.Checkpoint))):
      raise TypeError(RESTORE_MAP_ERROR_TEMPLATE.format(
          key.__class__.__name__, value.__class__.__name__))


+def is_object_based_checkpoint(checkpoint_path):
+  """Returns true if `checkpoint_path` points to an object-based checkpoint."""
+  var_names = [var[0] for var in tf.train.list_variables(checkpoint_path)]
+  return '_CHECKPOINTABLE_OBJECT_GRAPH' in var_names
+
+
 def load_fine_tune_checkpoint(
-    model, checkpoint_path, checkpoint_type, checkpoint_version,
-    load_all_detection_checkpoint_vars, input_dataset,
+    model, checkpoint_path, checkpoint_type, checkpoint_version, input_dataset,
    unpad_groundtruth_tensors):
  """Load a fine tuning classification or detection checkpoint.

@@ -292,8 +291,7 @@ def load_fine_tune_checkpoint(
  the model by computing a dummy loss. (Models might not have built their
  variables before their first execution)

-  It then loads a variable-name based classification or detection checkpoint
-  that comes from converted TF 1.x slim model checkpoints.
+  It then loads an object-based classification or detection checkpoint.

  This method updates the model in-place and does not return a value.

@@ -306,14 +304,22 @@ def load_fine_tune_checkpoint(
      classification checkpoint for initialization prior to training.
      Valid values: `detection`, `classification`.
    checkpoint_version: train_pb2.CheckpointVersion.V1 or V2 enum indicating
-      whether to load checkpoints in V1 style or V2 style.
-    load_all_detection_checkpoint_vars: whether to load all variables (when
-      `fine_tune_checkpoint_type` is `detection`). If False, only variables
-      within the feature extractor scopes are included. Default False.
+      whether to load checkpoints in V1 style or V2 style.  In this binary
+      we only support V2 style (object-based) checkpoints.
    input_dataset: The tf.data Dataset the model is being trained on. Needed
      to get the shapes for the dummy loss computation.
    unpad_groundtruth_tensors: A parameter passed to unstack_batch.
+
+  Raises:
+    IOError: if `checkpoint_path` does not point at a valid object-based
+      checkpoint
+    ValueError: if `checkpoint_version` is not train_pb2.CheckpointVersion.V2
  """
+  if not is_object_based_checkpoint(checkpoint_path):
+    raise IOError('Checkpoint is expected to be an object-based checkpoint.')
+  if checkpoint_version == train_pb2.CheckpointVersion.V1:
+    raise ValueError('Checkpoint version should be V2')
+
  features, labels = iter(input_dataset).next()

  @tf.function
@@ -330,35 +336,20 @@ def load_fine_tune_checkpoint(
        labels)

  strategy = tf.compat.v2.distribute.get_strategy()
-  strategy.run(
+  strategy.experimental_run_v2(
      _dummy_computation_fn, args=(
          features,
          labels,
      ))

-  if checkpoint_version == train_pb2.CheckpointVersion.V1:
-    var_map = model.restore_map(
-        fine_tune_checkpoint_type=checkpoint_type,
-        load_all_detection_checkpoint_vars=(
-            load_all_detection_checkpoint_vars))
-    available_var_map = variables_helper.get_variables_available_in_checkpoint(
-        var_map,
-        checkpoint_path,
-        include_global_step=False)
-    tf.train.init_from_checkpoint(checkpoint_path,
-                                  available_var_map)
-  elif checkpoint_version == train_pb2.CheckpointVersion.V2:
-    restore_map = model.restore_map(
-        fine_tune_checkpoint_type=checkpoint_type,
-        load_all_detection_checkpoint_vars=(
-            load_all_detection_checkpoint_vars))
-    validate_tf_v2_checkpoint_restore_map(restore_map)
-
-    ckpt = tf.train.Checkpoint(**restore_map)
-    ckpt.restore(checkpoint_path).assert_existing_objects_matched()
-
-
-def _get_filepath(strategy, filepath):
+  restore_from_objects_dict = model.restore_from_objects(
+      fine_tune_checkpoint_type=checkpoint_type)
+  validate_tf_v2_checkpoint_restore_map(restore_from_objects_dict)
+  ckpt = tf.train.Checkpoint(**restore_from_objects_dict)
+  ckpt.restore(checkpoint_path).assert_existing_objects_matched()
+
+
+def get_filepath(strategy, filepath):
  """Get appropriate filepath for worker.

  Args:
@@ -377,7 +368,7 @@ def _get_filepath(strategy, filepath):
    return os.path.join(filepath, 'temp_worker_{:03d}'.format(task_id))


-def _clean_temporary_directories(strategy, filepath):
+def clean_temporary_directories(strategy, filepath):
  """Temporary directory clean up for MultiWorker Mirrored Strategy.

  This is needed for all non-chief workers.
@@ -392,14 +383,12 @@ def _clean_temporary_directories(strategy, filepath):


 def train_loop(
-    hparams,
    pipeline_config_path,
    model_dir,
    config_override=None,
    train_steps=None,
    use_tpu=False,
    save_final_config=False,
-    export_to_tpu=None,
    checkpoint_every_n=1000,
    checkpoint_max_to_keep=7,
    **kwargs):
@@ -417,7 +406,6 @@ def train_loop(
    8. Logs the training metrics as TensorBoard summaries.

  Args:
-    hparams: A `HParams`.
    pipeline_config_path: A path to a pipeline config file.
    model_dir:
      The directory to save checkpoints and summaries to.
@@ -428,10 +416,6 @@ def train_loop(
    use_tpu: Boolean, whether training and evaluation should run on TPU.
    save_final_config: Whether to save final config (obtained after applying
      overrides) to `model_dir`.
-    export_to_tpu: When use_tpu and export_to_tpu are true,
-      `export_savedmodel()` exports a metagraph for serving on TPU besides the
-      one on CPU. If export_to_tpu is not provided, we will look for it in
-      hparams too.
    checkpoint_every_n:
      Checkpoint every n training steps.
    checkpoint_max_to_keep:
@@ -453,7 +437,7 @@ def train_loop(
      'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu
  })
  configs = merge_external_params_with_configs(
-      configs, hparams, kwargs_dict=kwargs)
+      configs, None, kwargs_dict=kwargs)
  model_config = configs['model']
  train_config = configs['train_config']
  train_input_config = configs['train_input_config']
@@ -468,33 +452,14 @@ def train_loop(
  if train_steps is None and train_config.num_steps != 0:
    train_steps = train_config.num_steps

-  # Read export_to_tpu from hparams if not passed.
-  if export_to_tpu is None:
-    export_to_tpu = hparams.get('export_to_tpu', False)
-  tf.logging.info(
-      'train_loop: use_tpu %s, export_to_tpu %s', use_tpu,
-      export_to_tpu)
-
  if kwargs['use_bfloat16']:
    tf.compat.v2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16')

-  # Parse the checkpoint fine tuning configs
-  if hparams.load_pretrained:
-    fine_tune_checkpoint_path = train_config.fine_tune_checkpoint
-  else:
-    fine_tune_checkpoint_path = None
-  load_all_detection_checkpoint_vars = (
-      train_config.load_all_detection_checkpoint_vars)
-  # TODO(kaftan) (or anyone else): move this piece of config munging to
-  ## utils/config_util.py
-  if not train_config.fine_tune_checkpoint_type:
-    # train_config.from_detection_checkpoint field is deprecated. For
-    # backward compatibility, set train_config.fine_tune_checkpoint_type
-    # based on train_config.from_detection_checkpoint.
-    if train_config.from_detection_checkpoint:
-      train_config.fine_tune_checkpoint_type = 'detection'
-    else:
-      train_config.fine_tune_checkpoint_type = 'classification'
+  if train_config.load_all_detection_checkpoint_vars:
+    raise ValueError('train_pb2.load_all_detection_checkpoint_vars '
+                     'unsupported in TF2')
+
+  config_util.update_fine_tune_checkpoint_type(train_config)
  fine_tune_checkpoint_type = train_config.fine_tune_checkpoint_type
  fine_tune_checkpoint_version = train_config.fine_tune_checkpoint_version

@@ -539,8 +504,8 @@ def train_loop(
  ## Train the model
  # Get the appropriate filepath (temporary or not) based on whether the worker
  # is the chief.
-  summary_writer_filepath = _get_filepath(strategy,
-                                          os.path.join(model_dir, 'train'))
+  summary_writer_filepath = get_filepath(strategy,
+                                         os.path.join(model_dir, 'train'))
  summary_writer = tf.compat.v2.summary.create_file_writer(
      summary_writer_filepath)

@@ -556,18 +521,18 @@ def train_loop(
      with tf.compat.v2.summary.record_if(
          lambda: global_step % num_steps_per_iteration == 0):
        # Load a fine-tuning checkpoint.
-        if fine_tune_checkpoint_path:
-          load_fine_tune_checkpoint(detection_model, fine_tune_checkpoint_path,
+        if train_config.fine_tune_checkpoint:
+          load_fine_tune_checkpoint(detection_model,
+                                    train_config.fine_tune_checkpoint,
                                    fine_tune_checkpoint_type,
                                    fine_tune_checkpoint_version,
-                                    load_all_detection_checkpoint_vars,
                                    train_input,
                                    unpad_groundtruth_tensors)

        ckpt = tf.compat.v2.train.Checkpoint(
            step=global_step, model=detection_model, optimizer=optimizer)

-        manager_dir = _get_filepath(strategy, model_dir)
+        manager_dir = get_filepath(strategy, model_dir)
        if not strategy.extended.should_checkpoint:
          checkpoint_max_to_keep = 1
        manager = tf.compat.v2.train.CheckpointManager(
@@ -597,7 +562,7 @@ def train_loop(

        def _sample_and_train(strategy, train_step_fn, data_iterator):
          features, labels = data_iterator.next()
-          per_replica_losses = strategy.run(
+          per_replica_losses = strategy.experimental_run_v2(
              train_step_fn, args=(features, labels))
          # TODO(anjalisridhar): explore if it is safe to remove the
          ## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
@@ -615,6 +580,10 @@ def train_loop(
          return _sample_and_train(strategy, train_step_fn, data_iterator)

        train_input_iter = iter(train_input)
+
+        if int(global_step.value()) == 0:
+          manager.save()
+
        checkpointed_step = int(global_step.value())
        logged_step = global_step.value()

@@ -646,8 +615,8 @@ def train_loop(
  # Remove the checkpoint directories of the non-chief workers that
  # MultiWorkerMirroredStrategy forces us to save during sync distributed
  # training.
-  _clean_temporary_directories(strategy, manager_dir)
-  _clean_temporary_directories(strategy, summary_writer_filepath)
+  clean_temporary_directories(strategy, manager_dir)
+  clean_temporary_directories(strategy, summary_writer_filepath)


 def eager_eval_loop(
@@ -767,28 +736,25 @@ def eager_eval_loop(

    return eval_dict, losses_dict, class_agnostic

+  agnostic_categories = label_map_util.create_class_agnostic_category_index()
+  per_class_categories = label_map_util.create_category_index_from_labelmap(
+      eval_input_config.label_map_path)
+  keypoint_edges = [
+      (kp.start, kp.end) for kp in eval_config.keypoint_edge]
+
  for i, (features, labels) in enumerate(eval_dataset):
    eval_dict, losses_dict, class_agnostic = compute_eval_dict(features, labels)

+    if class_agnostic:
+      category_index = agnostic_categories
+    else:
+      category_index = per_class_categories
+
    if i % 100 == 0:
      tf.logging.info('Finished eval step %d', i)

    use_original_images = fields.InputDataFields.original_image in features
-    if not use_tpu and use_original_images:
-      # Summary for input images.
-      tf.compat.v2.summary.image(
-          name='eval_input_images',
-          step=global_step,
-          data=eval_dict['original_image'],
-          max_outputs=1)
-      # Summary for prediction/groundtruth side-by-side images.
-      if class_agnostic:
-        category_index = label_map_util.create_class_agnostic_category_index()
-      else:
-        category_index = label_map_util.create_category_index_from_labelmap(
-            eval_input_config.label_map_path)
-      keypoint_edges = [
-          (kp.start, kp.end) for kp in eval_config.keypoint_edge]
+    if use_original_images and i < eval_config.num_visualizations:
      sbys_image_list = vutils.draw_side_by_side_evaluation_image(
          eval_dict,
          category_index=category_index,
@@ -798,10 +764,10 @@ def eager_eval_loop(
          keypoint_edges=keypoint_edges or None)
      sbys_images = tf.concat(sbys_image_list, axis=0)
      tf.compat.v2.summary.image(
-          name='eval_side_by_side',
+          name='eval_side_by_side_' + str(i),
          step=global_step,
          data=sbys_images,
-          max_outputs=eval_config.num_visualizations)
+          max_outputs=1)

    if evaluators is None:
      if class_agnostic:
@@ -830,14 +796,15 @@ def eager_eval_loop(
    eval_metrics[loss_key] = loss_metrics[loss_key].result()

  eval_metrics = {str(k): v for k, v in eval_metrics.items()}
+  tf.logging.info('Eval metrics at step %d', global_step)
  for k in eval_metrics:
    tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step)
+    tf.logging.info('\t+ %s: %f', k, eval_metrics[k])

  return eval_metrics


 def eval_continuously(
-    hparams,
    pipeline_config_path,
    config_override=None,
    train_steps=None,
@@ -846,7 +813,6 @@ def eval_continuously(
    use_tpu=False,
    override_eval_num_epochs=True,
    postprocess_on_cpu=False,
-    export_to_tpu=None,
    model_dir=None,
    checkpoint_dir=None,
    wait_interval=180,
@@ -859,7 +825,6 @@ def eval_continuously(
  on the evaluation data.

  Args:
-    hparams: A `HParams`.
    pipeline_config_path: A path to a pipeline config file.
    config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
      override the config from `pipeline_config_path`.
@@ -875,10 +840,6 @@ def eval_continuously(
      eval_input.
    postprocess_on_cpu: When use_tpu and postprocess_on_cpu are true,
      postprocess is scheduled on the host cpu.
-    export_to_tpu: When use_tpu and export_to_tpu are true,
-      `export_savedmodel()` exports a metagraph for serving on TPU besides the
-      one on CPU. If export_to_tpu is not provided, we will look for it in
-      hparams too.
    model_dir: Directory to output resulting evaluation summaries to.
    checkpoint_dir: Directory that contains the training checkpoints.
    wait_interval: The mimmum number of seconds to wait before checking for a
@@ -906,7 +867,7 @@ def eval_continuously(
    tf.logging.warning(
        'Forced number of epochs for all eval validations to be 1.')
  configs = merge_external_params_with_configs(
-      configs, hparams, kwargs_dict=kwargs)
+      configs, None, kwargs_dict=kwargs)
  model_config = configs['model']
  train_input_config = configs['train_input_config']
  eval_config = configs['eval_config']
@@ -938,12 +899,6 @@ def eval_continuously(
        model=detection_model)
    eval_inputs.append((eval_input_config.name, next_eval_input))

-  # Read export_to_tpu from hparams if not passed.
-  if export_to_tpu is None:
-    export_to_tpu = hparams.get('export_to_tpu', False)
-  tf.logging.info('eval_continuously: use_tpu %s, export_to_tpu %s',
-                  use_tpu, export_to_tpu)
-
  global_step = tf.compat.v2.Variable(
      0, trainable=False, dtype=tf.compat.v2.dtypes.int64)

@@ -956,7 +911,7 @@ def eval_continuously(

    for eval_name, eval_input in eval_inputs:
      summary_writer = tf.compat.v2.summary.create_file_writer(
-          model_dir + '/eval' + eval_name)
+          os.path.join(model_dir, 'eval', eval_name))
      with summary_writer.as_default():
        eager_eval_loop(
            detection_model,

--- a/research/object_detection/model_main_tf2.py
+++ b/research/object_detection/model_main_tf2.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Creates and runs TF2 object detection models.
+
+For local training/evaluation run:
+PIPELINE_CONFIG_PATH=path/to/pipeline.config
+MODEL_DIR=/tmp/model_outputs
+NUM_TRAIN_STEPS=10000
+SAMPLE_1_OF_N_EVAL_EXAMPLES=1
+python model_main_tf2.py -- \
+  --model_dir=$MODEL_DIR --num_train_steps=$NUM_TRAIN_STEPS \
+  --sample_1_of_n_eval_examples=$SAMPLE_1_OF_N_EVAL_EXAMPLES \
+  --pipeline_config_path=$PIPELINE_CONFIG_PATH \
+  --alsologtostderr
+"""
+from absl import flags
+import tensorflow.compat.v2 as tf
+from object_detection import model_lib_v2
+
+flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
+                    'file.')
+flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
+flags.DEFINE_bool('eval_on_train_data', False, 'Enable evaluating on train '
+                  'data (only supported in distributed training).')
+flags.DEFINE_integer('sample_1_of_n_eval_examples', None, 'Will sample one of '
+                     'every n eval input examples, where n is provided.')
+flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
+                     'one of every n train input examples for evaluation, '
+                     'where n is provided. This is only used if '
+                     '`eval_training_data` is True.')
+flags.DEFINE_string(
+    'model_dir', None, 'Path to output model directory '
+                       'where event and checkpoint files will be written.')
+flags.DEFINE_string(
+    'checkpoint_dir', None, 'Path to directory holding a checkpoint.  If '
+    '`checkpoint_dir` is provided, this binary operates in eval-only mode, '
+    'writing resulting metrics to `model_dir`.')
+
+flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
+                     'evaluation checkpoint before exiting.')
+
+flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.')
+flags.DEFINE_integer(
+    'num_workers', 1, 'When num_workers > 1, training uses '
+    'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
+    'MirroredStrategy.')
+
+FLAGS = flags.FLAGS
+
+
+def main(unused_argv):
+  flags.mark_flag_as_required('model_dir')
+  flags.mark_flag_as_required('pipeline_config_path')
+  tf.config.set_soft_device_placement(True)
+
+  if FLAGS.checkpoint_dir:
+    model_lib_v2.eval_continuously(
+        pipeline_config_path=FLAGS.pipeline_config_path,
+        model_dir=FLAGS.model_dir,
+        train_steps=FLAGS.num_train_steps,
+        sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
+        sample_1_of_n_eval_on_train_examples=(
+            FLAGS.sample_1_of_n_eval_on_train_examples),
+        checkpoint_dir=FLAGS.checkpoint_dir,
+        wait_interval=300, timeout=FLAGS.eval_timeout)
+  else:
+    if FLAGS.use_tpu:
+      resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
+      tf.config.experimental_connect_to_cluster(resolver)
+      tf.tpu.experimental.initialize_tpu_system(resolver)
+      strategy = tf.distribute.experimental.TPUStrategy(resolver)
+    elif FLAGS.num_workers > 1:
+      strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
+    else:
+      strategy = tf.compat.v2.distribute.MirroredStrategy()
+
+    with strategy.scope():
+      model_lib_v2.train_loop(
+          pipeline_config_path=FLAGS.pipeline_config_path,
+          model_dir=FLAGS.model_dir,
+          train_steps=FLAGS.num_train_steps,
+          use_tpu=FLAGS.use_tpu)
+
+if __name__ == '__main__':
+  tf.compat.v1.app.run()
--- a/research/object_detection/models/bidirectional_feature_pyramid_generators.py
+++ b/research/object_detection/models/bidirectional_feature_pyramid_generators.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions to generate bidirectional feature pyramids based on image features.
+
+Provides bidirectional feature pyramid network (BiFPN) generators that can be
+used to build object detection feature extractors, as proposed by Tan et al.
+See https://arxiv.org/abs/1911.09070 for more details.
+"""
+import collections
+import functools
+from six.moves import range
+from six.moves import zip
+import tensorflow as tf
+
+from object_detection.utils import bifpn_utils
+
+
+def _create_bifpn_input_config(fpn_min_level,
+                               fpn_max_level,
+                               input_max_level,
+                               level_scales=None):
+  """Creates a BiFPN input config for the input levels from a backbone network.
+
+  Args:
+    fpn_min_level: the minimum pyramid level (highest feature map resolution) to
+      use in the BiFPN.
+    fpn_max_level: the maximum pyramid level (lowest feature map resolution) to
+      use in the BiFPN.
+    input_max_level: the maximum pyramid level that will be provided as input to
+      the BiFPN. Accordingly, the BiFPN will compute additional pyramid levels
+      from input_max_level, up to the desired fpn_max_level.
+    level_scales: a list of pyramid level scale factors. If 'None', each level's
+      scale is set to 2^level by default, which corresponds to each successive
+      feature map scaling by a factor of 2.
+
+  Returns:
+    A list of dictionaries for each feature map expected as input to the BiFPN,
+    where each has entries for the feature map 'name' and 'scale'.
+  """
+  if not level_scales:
+    level_scales = [2**i for i in range(fpn_min_level, fpn_max_level + 1)]
+
+  bifpn_input_params = []
+  for i in range(fpn_min_level, min(fpn_max_level, input_max_level) + 1):
+    bifpn_input_params.append({
+        'name': '0_up_lvl_{}'.format(i),
+        'scale': level_scales[i - fpn_min_level]
+    })
+
+  return bifpn_input_params
+
+
+def _get_bifpn_output_node_names(fpn_min_level, fpn_max_level, node_config):
+  """Returns a list of BiFPN output node names, given a BiFPN node config.
+
+  Args:
+    fpn_min_level: the minimum pyramid level (highest feature map resolution)
+      used by the BiFPN.
+    fpn_max_level: the maximum pyramid level (lowest feature map resolution)
+      used by the BiFPN.
+    node_config: the BiFPN node_config, a list of dictionaries corresponding to
+      each node in the BiFPN computation graph, where each entry should have an
+      associated 'name'.
+
+  Returns:
+    A list of strings corresponding to the names of the output BiFPN nodes.
+  """
+  num_output_nodes = fpn_max_level - fpn_min_level + 1
+  return [node['name'] for node in node_config[-num_output_nodes:]]
+
+
+def _create_bifpn_node_config(bifpn_num_iterations,
+                              bifpn_num_filters,
+                              fpn_min_level,
+                              fpn_max_level,
+                              input_max_level,
+                              bifpn_node_params=None,
+                              level_scales=None):
+  """Creates a config specifying a bidirectional feature pyramid network.
+
+  Args:
+    bifpn_num_iterations: the number of top-down bottom-up feature computations
+      to repeat in the BiFPN.
+    bifpn_num_filters: the number of filters (channels) for every feature map
+      used in the BiFPN.
+    fpn_min_level: the minimum pyramid level (highest feature map resolution) to
+      use in the BiFPN.
+    fpn_max_level: the maximum pyramid level (lowest feature map resolution) to
+      use in the BiFPN.
+    input_max_level: the maximum pyramid level that will be provided as input to
+      the BiFPN. Accordingly, the BiFPN will compute additional pyramid levels
+      from input_max_level, up to the desired fpn_max_level.
+    bifpn_node_params: If not 'None', a dictionary of additional default BiFPN
+      node parameters that will be applied to all BiFPN nodes.
+    level_scales: a list of pyramid level scale factors. If 'None', each level's
+      scale is set to 2^level by default, which corresponds to each successive
+      feature map scaling by a factor of 2.
+
+  Returns:
+    A list of dictionaries used to define nodes in the BiFPN computation graph,
+    as proposed by EfficientDet, Tan et al (https://arxiv.org/abs/1911.09070).
+    Each node's entry has the corresponding keys:
+      name: String. The name of this node in the BiFPN. The node name follows
+        the format '{bifpn_iteration}_{dn|up}_lvl_{pyramid_level}', where 'dn'
+        or 'up' refers to whether the node is in the top-down or bottom-up
+        portion of a single BiFPN iteration.
+      scale: the scale factor for this node, by default 2^level.
+      inputs: A list of names of nodes which are inputs to this node.
+      num_channels: The number of channels for this node.
+      combine_method: String. Name of the method used to combine input
+        node feature maps, 'fast_attention' by default for nodes which have more
+        than one input. Otherwise, 'None' for nodes with only one input node.
+      input_op: A (partial) function which is called to construct the layers
+        that will be applied to this BiFPN node's inputs. This function is
+        called with the arguments:
+          input_op(name, input_scale, input_num_channels, output_scale,
+                   output_num_channels, conv_hyperparams, is_training,
+                   freeze_batchnorm)
+      post_combine_op: A (partial) function which is called to construct the
+        layers that will be applied to the result of the combine operation for
+        this BiFPN node. This function will be called with the arguments:
+          post_combine_op(name, conv_hyperparams, is_training, freeze_batchnorm)
+        If 'None', then no layers will be applied after the combine operation
+        for this node.
+  """
+  if not level_scales:
+    level_scales = [2**i for i in range(fpn_min_level, fpn_max_level + 1)]
+
+  default_node_params = {
+      'num_channels':
+          bifpn_num_filters,
+      'combine_method':
+          'fast_attention',
+      'input_op':
+          functools.partial(
+              _create_bifpn_resample_block, downsample_method='max_pooling'),
+      'post_combine_op':
+          functools.partial(
+              bifpn_utils.create_conv_block,
+              num_filters=bifpn_num_filters,
+              kernel_size=3,
+              strides=1,
+              padding='SAME',
+              use_separable=True,
+              apply_batchnorm=True,
+              apply_activation=True,
+              conv_bn_act_pattern=False),
+  }
+  if bifpn_node_params:
+    default_node_params.update(bifpn_node_params)
+
+  bifpn_node_params = []
+  # Create additional base pyramid levels not provided as input to the BiFPN.
+  # Note, combine_method and post_combine_op are set to None for additional
+  # base pyramid levels because they do not combine multiple input BiFPN nodes.
+  for i in range(input_max_level + 1, fpn_max_level + 1):
+    node_params = dict(default_node_params)
+    node_params.update({
+        'name': '0_up_lvl_{}'.format(i),
+        'scale': level_scales[i - fpn_min_level],
+        'inputs': ['0_up_lvl_{}'.format(i - 1)],
+        'combine_method': None,
+        'post_combine_op': None,
+    })
+    bifpn_node_params.append(node_params)
+
+  for i in range(bifpn_num_iterations):
+    # The first bottom-up feature pyramid (which includes the input pyramid
+    # levels from the backbone network and the additional base pyramid levels)
+    # is indexed at 0. So, the first top-down bottom-up pass of the BiFPN is
+    # indexed from 1, and repeated for bifpn_num_iterations iterations.
+    bifpn_i = i + 1
+
+    # Create top-down nodes.
+    for level_i in reversed(range(fpn_min_level, fpn_max_level)):
+      inputs = []
+      # BiFPN nodes in the top-down pass receive input from the corresponding
+      # level from the previous BiFPN iteration's bottom-up pass, except for the
+      # bottom-most (min) level node, which is computed once in the initial
+      # bottom-up pass, and is afterwards only computed in each top-down pass.
+      if level_i > fpn_min_level or bifpn_i == 1:
+        inputs.append('{}_up_lvl_{}'.format(bifpn_i - 1, level_i))
+      else:
+        inputs.append('{}_dn_lvl_{}'.format(bifpn_i - 1, level_i))
+      inputs.append(bifpn_node_params[-1]['name'])
+      node_params = dict(default_node_params)
+      node_params.update({
+          'name': '{}_dn_lvl_{}'.format(bifpn_i, level_i),
+          'scale': level_scales[level_i - fpn_min_level],
+          'inputs': inputs
+      })
+      bifpn_node_params.append(node_params)
+
+    # Create bottom-up nodes.
+    for level_i in range(fpn_min_level + 1, fpn_max_level + 1):
+      # BiFPN nodes in the bottom-up pass receive input from the corresponding
+      # level from the preceding top-down pass, except for the top (max) level
+      # which does not have a corresponding node in the top-down pass.
+      inputs = ['{}_up_lvl_{}'.format(bifpn_i - 1, level_i)]
+      if level_i < fpn_max_level:
+        inputs.append('{}_dn_lvl_{}'.format(bifpn_i, level_i))
+      inputs.append(bifpn_node_params[-1]['name'])
+      node_params = dict(default_node_params)
+      node_params.update({
+          'name': '{}_up_lvl_{}'.format(bifpn_i, level_i),
+          'scale': level_scales[level_i - fpn_min_level],
+          'inputs': inputs
+      })
+      bifpn_node_params.append(node_params)
+
+  return bifpn_node_params
+
+
+def _create_bifpn_resample_block(name,
+                                 input_scale,
+                                 input_num_channels,
+                                 output_scale,
+                                 output_num_channels,
+                                 conv_hyperparams,
+                                 is_training,
+                                 freeze_batchnorm,
+                                 downsample_method=None,
+                                 use_native_resize_op=False,
+                                 maybe_apply_1x1_conv=True,
+                                 apply_1x1_pre_sampling=True,
+                                 apply_1x1_post_sampling=False):
+  """Creates resample block layers for input feature maps to BiFPN nodes.
+
+  Args:
+    name: String. Name used for this block of layers.
+    input_scale: Scale factor of the input feature map.
+    input_num_channels: Number of channels in the input feature map.
+    output_scale: Scale factor of the output feature map.
+    output_num_channels: Number of channels in the output feature map.
+    conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
+      containing hyperparameters for convolution ops.
+    is_training: Indicates whether the feature generator is in training mode.
+    freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
+      training or not. When training with a small batch size (e.g. 1), it is
+      desirable to freeze batch norm update and use pretrained batch norm
+      params.
+    downsample_method: String. Method to use when downsampling feature maps.
+    use_native_resize_op: Bool. Whether to use the native resize up when
+      upsampling feature maps.
+    maybe_apply_1x1_conv: Bool. If 'True', a 1x1 convolution will only be
+      applied if the input_num_channels differs from the output_num_channels.
+    apply_1x1_pre_sampling: Bool. Whether a 1x1 convolution will be applied to
+      the input feature map before the up/down-sampling operation.
+    apply_1x1_post_sampling: Bool. Whether a 1x1 convolution will be applied to
+      the input feature map after the up/down-sampling operation.
+
+  Returns:
+    A list of layers which may be applied to the input feature maps in order to
+    compute feature maps with the specified scale and number of channels.
+  """
+  # By default, 1x1 convolutions are only applied before sampling when the
+  # number of input and output channels differ.
+  if maybe_apply_1x1_conv and output_num_channels == input_num_channels:
+    apply_1x1_pre_sampling = False
+    apply_1x1_post_sampling = False
+
+  apply_bn_for_resampling = True
+  layers = []
+  if apply_1x1_pre_sampling:
+    layers.extend(
+        bifpn_utils.create_conv_block(
+            name=name + '1x1_pre_sample/',
+            num_filters=output_num_channels,
+            kernel_size=1,
+            strides=1,
+            padding='SAME',
+            use_separable=False,
+            apply_batchnorm=apply_bn_for_resampling,
+            apply_activation=False,
+            conv_hyperparams=conv_hyperparams,
+            is_training=is_training,
+            freeze_batchnorm=freeze_batchnorm))
+
+  layers.extend(
+      bifpn_utils.create_resample_feature_map_ops(input_scale, output_scale,
+                                                  downsample_method,
+                                                  use_native_resize_op,
+                                                  conv_hyperparams, is_training,
+                                                  freeze_batchnorm, name))
+
+  if apply_1x1_post_sampling:
+    layers.extend(
+        bifpn_utils.create_conv_block(
+            name=name + '1x1_post_sample/',
+            num_filters=output_num_channels,
+            kernel_size=1,
+            strides=1,
+            padding='SAME',
+            use_separable=False,
+            apply_batchnorm=apply_bn_for_resampling,
+            apply_activation=False,
+            conv_hyperparams=conv_hyperparams,
+            is_training=is_training,
+            freeze_batchnorm=freeze_batchnorm))
+
+  return layers
+
+
+def _create_bifpn_combine_op(num_inputs, name, combine_method):
+  """Creates a BiFPN output config, a list of the output BiFPN node names.
+
+  Args:
+    num_inputs: The number of inputs to this combine operation.
+    name: String. The name of this combine operation.
+    combine_method: String. The method used to combine input feature maps.
+
+  Returns:
+    A function which may be called with a list of num_inputs feature maps
+    and which will return a single feature map.
+  """
+
+  combine_op = None
+  if num_inputs < 1:
+    raise ValueError('Expected at least 1 input for BiFPN combine.')
+  elif num_inputs == 1:
+    combine_op = lambda x: x[0]
+  else:
+    combine_op = bifpn_utils.BiFPNCombineLayer(
+        combine_method=combine_method, name=name)
+  return combine_op
+
+
+class KerasBiFpnFeatureMaps(tf.keras.Model):
+  """Generates Keras based BiFPN feature maps from an input feature map pyramid.
+
+  A Keras model that generates multi-scale feature maps for detection by
+  iteratively computing top-down and bottom-up feature pyramids, as in the
+  EfficientDet paper by Tan et al, see arxiv.org/abs/1911.09070 for details.
+  """
+
+  def __init__(self,
+               bifpn_num_iterations,
+               bifpn_num_filters,
+               fpn_min_level,
+               fpn_max_level,
+               input_max_level,
+               is_training,
+               conv_hyperparams,
+               freeze_batchnorm,
+               bifpn_node_params=None,
+               name=None):
+    """Constructor.
+
+    Args:
+      bifpn_num_iterations: The number of top-down bottom-up iterations.
+      bifpn_num_filters: The number of filters (channels) to be used for all
+        feature maps in this BiFPN.
+      fpn_min_level: The minimum pyramid level (highest feature map resolution)
+        to use in the BiFPN.
+      fpn_max_level: The maximum pyramid level (lowest feature map resolution)
+        to use in the BiFPN.
+      input_max_level: The maximum pyramid level that will be provided as input
+        to the BiFPN. Accordingly, the BiFPN will compute any additional pyramid
+        levels from input_max_level up to the desired fpn_max_level, with each
+        successivel level downsampling by a scale factor of 2 by default.
+      is_training: Indicates whether the feature generator is in training mode.
+      conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
+        containing hyperparameters for convolution ops.
+      freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
+        training or not. When training with a small batch size (e.g. 1), it is
+        desirable to freeze batch norm update and use pretrained batch norm
+        params.
+      bifpn_node_params: An optional dictionary that may be used to specify
+        default parameters for BiFPN nodes, without the need to provide a custom
+        bifpn_node_config. For example, if '{ combine_method: 'sum' }', then all
+        BiFPN nodes will combine input feature maps by summation, rather than
+        by the default fast attention method.
+      name: A string name scope to assign to the model. If 'None', Keras
+        will auto-generate one from the class name.
+    """
+    super(KerasBiFpnFeatureMaps, self).__init__(name=name)
+    bifpn_node_config = _create_bifpn_node_config(
+        bifpn_num_iterations, bifpn_num_filters, fpn_min_level, fpn_max_level,
+        input_max_level, bifpn_node_params)
+    bifpn_input_config = _create_bifpn_input_config(
+        fpn_min_level, fpn_max_level, input_max_level)
+    bifpn_output_node_names = _get_bifpn_output_node_names(
+        fpn_min_level, fpn_max_level, bifpn_node_config)
+
+    self.bifpn_node_config = bifpn_node_config
+    self.bifpn_output_node_names = bifpn_output_node_names
+    self.node_input_blocks = []
+    self.node_combine_op = []
+    self.node_post_combine_block = []
+
+    all_node_params = bifpn_input_config
+    all_node_names = [node['name'] for node in all_node_params]
+    for node_config in bifpn_node_config:
+      # Maybe transform and/or resample input feature maps.
+      input_blocks = []
+      for input_name in node_config['inputs']:
+        if input_name not in all_node_names:
+          raise ValueError(
+              'Input feature map ({}) does not exist:'.format(input_name))
+        input_index = all_node_names.index(input_name)
+        input_params = all_node_params[input_index]
+        input_block = node_config['input_op'](
+            name='{}/input_{}/'.format(node_config['name'], input_name),
+            input_scale=input_params['scale'],
+            input_num_channels=input_params.get('num_channels', None),
+            output_scale=node_config['scale'],
+            output_num_channels=node_config['num_channels'],
+            conv_hyperparams=conv_hyperparams,
+            is_training=is_training,
+            freeze_batchnorm=freeze_batchnorm)
+        input_blocks.append((input_index, input_block))
+
+      # Combine input feature maps.
+      combine_op = _create_bifpn_combine_op(
+          num_inputs=len(input_blocks),
+          name=(node_config['name'] + '/combine'),
+          combine_method=node_config['combine_method'])
+
+      # Post-combine layers.
+      post_combine_block = []
+      if node_config['post_combine_op']:
+        post_combine_block.extend(node_config['post_combine_op'](
+            name=node_config['name'] + '/post_combine/',
+            conv_hyperparams=conv_hyperparams,
+            is_training=is_training,
+            freeze_batchnorm=freeze_batchnorm))
+
+      self.node_input_blocks.append(input_blocks)
+      self.node_combine_op.append(combine_op)
+      self.node_post_combine_block.append(post_combine_block)
+      all_node_params.append(node_config)
+      all_node_names.append(node_config['name'])
+
+  def call(self, feature_pyramid):
+    """Compute BiFPN feature maps from input feature pyramid.
+
+    Executed when calling the `.__call__` method on input.
+
+    Args:
+      feature_pyramid: list of tuples of (tensor_name, image_feature_tensor).
+
+    Returns:
+      feature_maps: an OrderedDict mapping keys (feature map names) to
+        tensors where each tensor has shape [batch, height_i, width_i, depth_i].
+    """
+    feature_maps = [el[1] for el in feature_pyramid]
+    output_feature_maps = [None for node in self.bifpn_output_node_names]
+
+    for index, node in enumerate(self.bifpn_node_config):
+      node_scope = 'node_{:02d}'.format(index)
+      with tf.name_scope(node_scope):
+        # Apply layer blocks to this node's input feature maps.
+        input_block_results = []
+        for input_index, input_block in self.node_input_blocks[index]:
+          block_result = feature_maps[input_index]
+          for layer in input_block:
+            block_result = layer(block_result)
+          input_block_results.append(block_result)
+
+        # Combine the resulting feature maps.
+        node_result = self.node_combine_op[index](input_block_results)
+
+        # Apply post-combine layer block if applicable.
+        for layer in self.node_post_combine_block[index]:
+          node_result = layer(node_result)
+
+        feature_maps.append(node_result)
+
+        if node['name'] in self.bifpn_output_node_names:
+          index = self.bifpn_output_node_names.index(node['name'])
+          output_feature_maps[index] = node_result
+
+    return collections.OrderedDict(
+        zip(self.bifpn_output_node_names, output_feature_maps))
--- a/research/object_detection/models/bidirectional_feature_pyramid_generators_tf2_test.py
+++ b/research/object_detection/models/bidirectional_feature_pyramid_generators_tf2_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for bidirectional feature pyramid generators."""
+import unittest
+from absl.testing import parameterized
+
+import tensorflow.compat.v1 as tf
+
+from google.protobuf import text_format
+
+from object_detection.builders import hyperparams_builder
+from object_detection.models import bidirectional_feature_pyramid_generators as bifpn_generators
+from object_detection.protos import hyperparams_pb2
+from object_detection.utils import test_case
+from object_detection.utils import test_utils
+from object_detection.utils import tf_version
+
+
+@parameterized.parameters({'bifpn_num_iterations': 2},
+                          {'bifpn_num_iterations': 8})
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class BiFPNFeaturePyramidGeneratorTest(test_case.TestCase):
+
+  def _build_conv_hyperparams(self):
+    conv_hyperparams = hyperparams_pb2.Hyperparams()
+    conv_hyperparams_text_proto = """
+      regularizer {
+        l2_regularizer {
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+        }
+      }
+      force_use_bias: true
+    """
+    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
+
+  def test_get_expected_feature_map_shapes(self, bifpn_num_iterations):
+    with test_utils.GraphContextOrNone() as g:
+      image_features = [
+          ('block3', tf.random_uniform([4, 16, 16, 256], dtype=tf.float32)),
+          ('block4', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
+          ('block5', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32))
+      ]
+      bifpn_generator = bifpn_generators.KerasBiFpnFeatureMaps(
+          bifpn_num_iterations=bifpn_num_iterations,
+          bifpn_num_filters=128,
+          fpn_min_level=3,
+          fpn_max_level=7,
+          input_max_level=5,
+          is_training=True,
+          conv_hyperparams=self._build_conv_hyperparams(),
+          freeze_batchnorm=False)
+    def graph_fn():
+      feature_maps = bifpn_generator(image_features)
+      return feature_maps
+
+    expected_feature_map_shapes = {
+        '{}_dn_lvl_3'.format(bifpn_num_iterations): (4, 16, 16, 128),
+        '{}_up_lvl_4'.format(bifpn_num_iterations): (4, 8, 8, 128),
+        '{}_up_lvl_5'.format(bifpn_num_iterations): (4, 4, 4, 128),
+        '{}_up_lvl_6'.format(bifpn_num_iterations): (4, 2, 2, 128),
+        '{}_up_lvl_7'.format(bifpn_num_iterations): (4, 1, 1, 128)}
+    out_feature_maps = self.execute(graph_fn, [], g)
+    out_feature_map_shapes = dict(
+        (key, value.shape) for key, value in out_feature_maps.items())
+    self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
+
+  def test_get_expected_variable_names(self, bifpn_num_iterations):
+    with test_utils.GraphContextOrNone() as g:
+      image_features = [
+          ('block3', tf.random_uniform([4, 16, 16, 256], dtype=tf.float32)),
+          ('block4', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
+          ('block5', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32))
+      ]
+      bifpn_generator = bifpn_generators.KerasBiFpnFeatureMaps(
+          bifpn_num_iterations=bifpn_num_iterations,
+          bifpn_num_filters=128,
+          fpn_min_level=3,
+          fpn_max_level=7,
+          input_max_level=5,
+          is_training=True,
+          conv_hyperparams=self._build_conv_hyperparams(),
+          freeze_batchnorm=False,
+          name='bifpn')
+    def graph_fn():
+      return bifpn_generator(image_features)
+
+    self.execute(graph_fn, [], g)
+    expected_variables = [
+        'bifpn/node_00/0_up_lvl_6/input_0_up_lvl_5/1x1_pre_sample/conv/bias',
+        'bifpn/node_00/0_up_lvl_6/input_0_up_lvl_5/1x1_pre_sample/conv/kernel',
+        'bifpn/node_03/1_dn_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/bias',
+        'bifpn/node_03/1_dn_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/kernel',
+        'bifpn/node_04/1_dn_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/bias',
+        'bifpn/node_04/1_dn_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/kernel',
+        'bifpn/node_05/1_dn_lvl_3/input_0_up_lvl_3/1x1_pre_sample/conv/bias',
+        'bifpn/node_05/1_dn_lvl_3/input_0_up_lvl_3/1x1_pre_sample/conv/kernel',
+        'bifpn/node_06/1_up_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/bias',
+        'bifpn/node_06/1_up_lvl_4/input_0_up_lvl_4/1x1_pre_sample/conv/kernel',
+        'bifpn/node_07/1_up_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/bias',
+        'bifpn/node_07/1_up_lvl_5/input_0_up_lvl_5/1x1_pre_sample/conv/kernel']
+    expected_node_variable_patterns = [
+        ['bifpn/node_{:02}/{}_dn_lvl_6/combine/bifpn_combine_weights',
+         'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/bias',
+         'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/depthwise_kernel',
+         'bifpn/node_{:02}/{}_dn_lvl_6/post_combine/separable_conv/pointwise_kernel'],
+        ['bifpn/node_{:02}/{}_dn_lvl_5/combine/bifpn_combine_weights',
+         'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/bias',
+         'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/depthwise_kernel',
+         'bifpn/node_{:02}/{}_dn_lvl_5/post_combine/separable_conv/pointwise_kernel'],
+        ['bifpn/node_{:02}/{}_dn_lvl_4/combine/bifpn_combine_weights',
+         'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/bias',
+         'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/depthwise_kernel',
+         'bifpn/node_{:02}/{}_dn_lvl_4/post_combine/separable_conv/pointwise_kernel'],
+        ['bifpn/node_{:02}/{}_dn_lvl_3/combine/bifpn_combine_weights',
+         'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/bias',
+         'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/depthwise_kernel',
+         'bifpn/node_{:02}/{}_dn_lvl_3/post_combine/separable_conv/pointwise_kernel'],
+        ['bifpn/node_{:02}/{}_up_lvl_4/combine/bifpn_combine_weights',
+         'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/bias',
+         'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/depthwise_kernel',
+         'bifpn/node_{:02}/{}_up_lvl_4/post_combine/separable_conv/pointwise_kernel'],
+        ['bifpn/node_{:02}/{}_up_lvl_5/combine/bifpn_combine_weights',
+         'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/bias',
+         'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/depthwise_kernel',
+         'bifpn/node_{:02}/{}_up_lvl_5/post_combine/separable_conv/pointwise_kernel'],
+        ['bifpn/node_{:02}/{}_up_lvl_6/combine/bifpn_combine_weights',
+         'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/bias',
+         'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/depthwise_kernel',
+         'bifpn/node_{:02}/{}_up_lvl_6/post_combine/separable_conv/pointwise_kernel'],
+        ['bifpn/node_{:02}/{}_up_lvl_7/combine/bifpn_combine_weights',
+         'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/bias',
+         'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/depthwise_kernel',
+         'bifpn/node_{:02}/{}_up_lvl_7/post_combine/separable_conv/pointwise_kernel']]
+
+    node_i = 2
+    for iter_i in range(1, bifpn_num_iterations+1):
+      for node_variable_patterns in expected_node_variable_patterns:
+        for pattern in node_variable_patterns:
+          expected_variables.append(pattern.format(node_i, iter_i))
+        node_i += 1
+
+    expected_variables = set(expected_variables)
+    actual_variable_set = set(
+        [var.name.split(':')[0] for var in bifpn_generator.variables])
+    self.assertSetEqual(expected_variables, actual_variable_set)
+
+# TODO(aom): Tests for create_bifpn_combine_op.
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/center_net_hourglass_feature_extractor.py
+++ b/research/object_detection/models/center_net_hourglass_feature_extractor.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Hourglass[1] feature extractor for CenterNet[2] meta architecture.
+
+[1]: https://arxiv.org/abs/1603.06937
+[2]: https://arxiv.org/abs/1904.07850
+"""
+
+from object_detection.meta_architectures import center_net_meta_arch
+from object_detection.models.keras_models import hourglass_network
+
+
+class CenterNetHourglassFeatureExtractor(
+    center_net_meta_arch.CenterNetFeatureExtractor):
+  """The hourglass feature extractor for CenterNet.
+
+  This class is a thin wrapper around the HourglassFeatureExtractor class
+  along with some preprocessing methods inherited from the base class.
+  """
+
+  def __init__(self, hourglass_net, channel_means=(0., 0., 0.),
+               channel_stds=(1., 1., 1.), bgr_ordering=False):
+    """Intializes the feature extractor.
+
+    Args:
+      hourglass_net: The underlying hourglass network to use.
+      channel_means: A tuple of floats, denoting the mean of each channel
+        which will be subtracted from it.
+      channel_stds: A tuple of floats, denoting the standard deviation of each
+        channel. Each channel will be divided by its standard deviation value.
+      bgr_ordering: bool, if set will change the channel ordering to be in the
+        [blue, red, green] order.
+    """
+
+    super(CenterNetHourglassFeatureExtractor, self).__init__(
+        channel_means=channel_means, channel_stds=channel_stds,
+        bgr_ordering=bgr_ordering)
+    self._network = hourglass_net
+
+  def call(self, inputs):
+    return self._network(inputs)
+
+  @property
+  def out_stride(self):
+    """The stride in the output image of the network."""
+    return 4
+
+  @property
+  def num_feature_outputs(self):
+    """Ther number of feature outputs returned by the feature extractor."""
+    return self._network.num_hourglasses
+
+  def get_model(self):
+    return self._network
+
+
+def hourglass_104(channel_means, channel_stds, bgr_ordering):
+  """The Hourglass-104 backbone for CenterNet."""
+
+  network = hourglass_network.hourglass_104()
+  return CenterNetHourglassFeatureExtractor(
+      network, channel_means=channel_means, channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering)
--- a/official/r1/utils/logs/mock_lib.py
+++ b/official/r1/utils/logs/mock_lib.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,25 +12,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Testing hourglass feature extractor for CenterNet."""
+import unittest
+import numpy as np
+import tensorflow.compat.v1 as tf

-"""Mock objects and related functions for testing."""
+from object_detection.models import center_net_hourglass_feature_extractor as hourglass
+from object_detection.models.keras_models import hourglass_network
+from object_detection.utils import test_case
+from object_detection.utils import tf_version

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function

+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetHourglassFeatureExtractorTest(test_case.TestCase):

-class MockBenchmarkLogger(object):
-  """This is a mock logger that can be used in dependent tests."""
+  def test_center_net_hourglass_feature_extractor(self):

-  def __init__(self):
-    self.logged_metric = []
+    net = hourglass_network.HourglassNetwork(
+        num_stages=4, blocks_per_stage=[2, 3, 4, 5, 6],
+        channel_dims=[4, 6, 8, 10, 12, 14], num_hourglasses=2)

-  def log_metric(self, name, value, unit=None, global_step=None,
-                 extras=None):
-    self.logged_metric.append({
-        "name": name,
-        "value": float(value),
-        "unit": unit,
-        "global_step": global_step,
-        "extras": extras})
+    model = hourglass.CenterNetHourglassFeatureExtractor(net)
+    def graph_fn():
+      return model(tf.zeros((2, 64, 64, 3), dtype=np.float32))
+    outputs = self.execute(graph_fn, [])
+    self.assertEqual(outputs[0].shape, (2, 16, 16, 6))
+    self.assertEqual(outputs[1].shape, (2, 16, 16, 6))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/center_net_resnet_feature_extractor.py
+++ b/research/object_detection/models/center_net_resnet_feature_extractor.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resnetv2 based feature extractors for CenterNet[1] meta architecture.
+
+[1]: https://arxiv.org/abs/1904.07850
+"""
+
+
+import tensorflow.compat.v1 as tf
+
+from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor
+
+
+class CenterNetResnetFeatureExtractor(CenterNetFeatureExtractor):
+  """Resnet v2 base feature extractor for the CenterNet model."""
+
+  def __init__(self, resnet_type, channel_means=(0., 0., 0.),
+               channel_stds=(1., 1., 1.), bgr_ordering=False):
+    """Initializes the feature extractor with a specific ResNet architecture.
+
+    Args:
+      resnet_type: A string specifying which kind of ResNet to use. Currently
+        only `resnet_v2_50` and `resnet_v2_101` are supported.
+      channel_means: A tuple of floats, denoting the mean of each channel
+        which will be subtracted from it.
+      channel_stds: A tuple of floats, denoting the standard deviation of each
+        channel. Each channel will be divided by its standard deviation value.
+      bgr_ordering: bool, if set will change the channel ordering to be in the
+        [blue, red, green] order.
+
+    """
+
+    super(CenterNetResnetFeatureExtractor, self).__init__(
+        channel_means=channel_means, channel_stds=channel_stds,
+        bgr_ordering=bgr_ordering)
+    if resnet_type == 'resnet_v2_101':
+      self._base_model = tf.keras.applications.ResNet101V2(weights=None)
+      output_layer = 'conv5_block3_out'
+    elif resnet_type == 'resnet_v2_50':
+      self._base_model = tf.keras.applications.ResNet50V2(weights=None)
+      output_layer = 'conv5_block3_out'
+    else:
+      raise ValueError('Unknown Resnet Model {}'.format(resnet_type))
+    output_layer = self._base_model.get_layer(output_layer)
+
+    self._resnet_model = tf.keras.models.Model(inputs=self._base_model.input,
+                                               outputs=output_layer.output)
+    resnet_output = self._resnet_model(self._base_model.input)
+
+    for num_filters in [256, 128, 64]:
+      # TODO(vighneshb) This section has a few differences from the paper
+      # Figure out how much of a performance impact they have.
+
+      # 1. We use a simple convolution instead of a deformable convolution
+      conv = tf.keras.layers.Conv2D(filters=num_filters, kernel_size=3,
+                                    strides=1, padding='same')
+      resnet_output = conv(resnet_output)
+      resnet_output = tf.keras.layers.BatchNormalization()(resnet_output)
+      resnet_output = tf.keras.layers.ReLU()(resnet_output)
+
+      # 2. We use the default initialization for the convolution layers
+      # instead of initializing it to do bilinear upsampling.
+      conv_transpose = tf.keras.layers.Conv2DTranspose(filters=num_filters,
+                                                       kernel_size=3, strides=2,
+                                                       padding='same')
+      resnet_output = conv_transpose(resnet_output)
+      resnet_output = tf.keras.layers.BatchNormalization()(resnet_output)
+      resnet_output = tf.keras.layers.ReLU()(resnet_output)
+
+    self._feature_extractor_model = tf.keras.models.Model(
+        inputs=self._base_model.input, outputs=resnet_output)
+
+  def preprocess(self, resized_inputs):
+    """Preprocess input images for the ResNet model.
+
+    This scales images in the range [0, 255] to the range [-1, 1]
+
+    Args:
+      resized_inputs: a [batch, height, width, channels] float32 tensor.
+
+    Returns:
+      outputs: a [batch, height, width, channels] float32 tensor.
+
+    """
+    resized_inputs = super(CenterNetResnetFeatureExtractor, self).preprocess(
+        resized_inputs)
+    return tf.keras.applications.resnet_v2.preprocess_input(resized_inputs)
+
+  def load_feature_extractor_weights(self, path):
+    self._base_model.load_weights(path)
+
+  def get_base_model(self):
+    """Get base resnet model for inspection and testing."""
+    return self._base_model
+
+  def call(self, inputs):
+    """Returns image features extracted by the backbone.
+
+    Args:
+      inputs: An image tensor of shape [batch_size, input_height,
+        input_width, 3]
+
+    Returns:
+      features_list: A list of length 1 containing a tensor of shape
+        [batch_size, input_height // 4, input_width // 4, 64] containing
+        the features extracted by the ResNet.
+    """
+    return [self._feature_extractor_model(inputs)]
+
+  @property
+  def num_feature_outputs(self):
+    return 1
+
+  @property
+  def out_stride(self):
+    return 4
+
+
+def resnet_v2_101(channel_means, channel_stds, bgr_ordering):
+  """The ResNet v2 101 feature extractor."""
+
+  return CenterNetResnetFeatureExtractor(
+      resnet_type='resnet_v2_101',
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering
+  )
+
+
+def resnet_v2_50(channel_means, channel_stds, bgr_ordering):
+  """The ResNet v2 50 feature extractor."""
+
+  return CenterNetResnetFeatureExtractor(
+      resnet_type='resnet_v2_50',
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering)
--- a/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing ResNet v2 models for the CenterNet meta architecture."""
+import unittest
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import center_net_resnet_feature_extractor
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetResnetFeatureExtractorTest(test_case.TestCase):
+
+  def test_output_size(self):
+    """Verify that shape of features returned by the backbone is correct."""
+
+    model = center_net_resnet_feature_extractor.\
+                CenterNetResnetFeatureExtractor('resnet_v2_101')
+    def graph_fn():
+      img = np.zeros((8, 224, 224, 3), dtype=np.float32)
+      processed_img = model.preprocess(img)
+      return model(processed_img)
+    outputs = self.execute(graph_fn, [])
+    self.assertEqual(outputs.shape, (8, 56, 56, 64))
+
+  def test_output_size_resnet50(self):
+    """Verify that shape of features returned by the backbone is correct."""
+
+    model = center_net_resnet_feature_extractor.\
+                CenterNetResnetFeatureExtractor('resnet_v2_50')
+    def graph_fn():
+      img = np.zeros((8, 224, 224, 3), dtype=np.float32)
+      processed_img = model.preprocess(img)
+      return model(processed_img)
+    outputs = self.execute(graph_fn, [])
+    self.assertEqual(outputs.shape, (8, 56, 56, 64))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py
+++ b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resnetv1 FPN [1] based feature extractors for CenterNet[2] meta architecture.
+
+
+[1]: https://arxiv.org/abs/1612.03144.
+[2]: https://arxiv.org/abs/1904.07850.
+"""
+import tensorflow.compat.v1 as tf
+
+from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor
+
+
+_RESNET_MODEL_OUTPUT_LAYERS = {
+    'resnet_v1_50': ['conv2_block3_out', 'conv3_block4_out',
+                     'conv4_block6_out', 'conv5_block3_out'],
+    'resnet_v1_101': ['conv2_block3_out', 'conv3_block4_out',
+                      'conv4_block23_out', 'conv5_block3_out'],
+}
+
+
+class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor):
+  """Resnet v1 FPN base feature extractor for the CenterNet model.
+
+  This feature extractor uses residual skip connections and nearest neighbor
+  upsampling to produce an output feature map of stride 4, which has precise
+  localization information along with strong semantic information from the top
+  of the net. This design does not exactly follow the original FPN design,
+  specifically:
+  - Since only one output map is necessary for heatmap prediction (stride 4
+    output), the top-down feature maps can have different numbers of channels.
+    Specifically, the top down feature maps have the following sizes:
+    [h/4, w/4, 64], [h/8, w/8, 128], [h/16, w/16, 256], [h/32, w/32, 256].
+  - No additional coarse features are used after conv5_x.
+  """
+
+  def __init__(self, resnet_type, channel_means=(0., 0., 0.),
+               channel_stds=(1., 1., 1.), bgr_ordering=False):
+    """Initializes the feature extractor with a specific ResNet architecture.
+
+    Args:
+      resnet_type: A string specifying which kind of ResNet to use. Currently
+        only `resnet_v1_50` and `resnet_v1_101` are supported.
+      channel_means: A tuple of floats, denoting the mean of each channel
+        which will be subtracted from it.
+      channel_stds: A tuple of floats, denoting the standard deviation of each
+        channel. Each channel will be divided by its standard deviation value.
+      bgr_ordering: bool, if set will change the channel ordering to be in the
+        [blue, red, green] order.
+
+    """
+
+    super(CenterNetResnetV1FpnFeatureExtractor, self).__init__(
+        channel_means=channel_means, channel_stds=channel_stds,
+        bgr_ordering=bgr_ordering)
+    if resnet_type == 'resnet_v1_50':
+      self._base_model = tf.keras.applications.ResNet50(weights=None)
+    elif resnet_type == 'resnet_v1_101':
+      self._base_model = tf.keras.applications.ResNet101(weights=None)
+    else:
+      raise ValueError('Unknown Resnet Model {}'.format(resnet_type))
+    output_layers = _RESNET_MODEL_OUTPUT_LAYERS[resnet_type]
+    outputs = [self._base_model.get_layer(output_layer_name).output
+               for output_layer_name in output_layers]
+
+    self._resnet_model = tf.keras.models.Model(inputs=self._base_model.input,
+                                               outputs=outputs)
+    resnet_outputs = self._resnet_model(self._base_model.input)
+
+    # Construct the top-down feature maps.
+    top_layer = resnet_outputs[-1]
+    residual_op = tf.keras.layers.Conv2D(filters=256, kernel_size=1,
+                                         strides=1, padding='same')
+    top_down = residual_op(top_layer)
+
+    num_filters_list = [256, 128, 64]
+    for i, num_filters in enumerate(num_filters_list):
+      level_ind = 2 - i
+      # Upsample.
+      upsample_op = tf.keras.layers.UpSampling2D(2, interpolation='nearest')
+      top_down = upsample_op(top_down)
+
+      # Residual (skip-connection) from bottom-up pathway.
+      residual_op = tf.keras.layers.Conv2D(filters=num_filters, kernel_size=1,
+                                           strides=1, padding='same')
+      residual = residual_op(resnet_outputs[level_ind])
+
+      # Merge.
+      top_down = top_down + residual
+      next_num_filters = num_filters_list[i+1] if i + 1 <= 2 else 64
+      conv = tf.keras.layers.Conv2D(filters=next_num_filters,
+                                    kernel_size=3, strides=1, padding='same')
+      top_down = conv(top_down)
+      top_down = tf.keras.layers.BatchNormalization()(top_down)
+      top_down = tf.keras.layers.ReLU()(top_down)
+
+    self._feature_extractor_model = tf.keras.models.Model(
+        inputs=self._base_model.input, outputs=top_down)
+
+  def preprocess(self, resized_inputs):
+    """Preprocess input images for the ResNet model.
+
+    This scales images in the range [0, 255] to the range [-1, 1]
+
+    Args:
+      resized_inputs: a [batch, height, width, channels] float32 tensor.
+
+    Returns:
+      outputs: a [batch, height, width, channels] float32 tensor.
+
+    """
+    resized_inputs = super(
+        CenterNetResnetV1FpnFeatureExtractor, self).preprocess(resized_inputs)
+    return tf.keras.applications.resnet.preprocess_input(resized_inputs)
+
+  def load_feature_extractor_weights(self, path):
+    self._base_model.load_weights(path)
+
+  def get_base_model(self):
+    """Get base resnet model for inspection and testing."""
+    return self._base_model
+
+  def call(self, inputs):
+    """Returns image features extracted by the backbone.
+
+    Args:
+      inputs: An image tensor of shape [batch_size, input_height,
+        input_width, 3]
+
+    Returns:
+      features_list: A list of length 1 containing a tensor of shape
+        [batch_size, input_height // 4, input_width // 4, 64] containing
+        the features extracted by the ResNet.
+    """
+    return [self._feature_extractor_model(inputs)]
+
+  @property
+  def num_feature_outputs(self):
+    return 1
+
+  @property
+  def out_stride(self):
+    return 4
+
+
+def resnet_v1_101_fpn(channel_means, channel_stds, bgr_ordering):
+  """The ResNet v1 101 FPN feature extractor."""
+
+  return CenterNetResnetV1FpnFeatureExtractor(
+      resnet_type='resnet_v1_101',
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering
+  )
+
+
+def resnet_v1_50_fpn(channel_means, channel_stds, bgr_ordering):
+  """The ResNet v1 50 FPN feature extractor."""
+
+  return CenterNetResnetV1FpnFeatureExtractor(
+      resnet_type='resnet_v1_50',
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering)
--- a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing ResNet v1 FPN models for the CenterNet meta architecture."""
+import unittest
+from absl.testing import parameterized
+
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.models import center_net_resnet_v1_fpn_feature_extractor
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetResnetV1FpnFeatureExtractorTest(test_case.TestCase,
+                                               parameterized.TestCase):
+
+  @parameterized.parameters(
+      {'resnet_type': 'resnet_v1_50'},
+      {'resnet_type': 'resnet_v1_101'},
+  )
+  def test_correct_output_size(self, resnet_type):
+    """Verify that shape of features returned by the backbone is correct."""
+
+    model = center_net_resnet_v1_fpn_feature_extractor.\
+                CenterNetResnetV1FpnFeatureExtractor(resnet_type)
+    def graph_fn():
+      img = np.zeros((8, 224, 224, 3), dtype=np.float32)
+      processed_img = model.preprocess(img)
+      return model(processed_img)
+
+    self.assertEqual(self.execute(graph_fn, []).shape, (8, 56, 56, 64))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
@@ -14,13 +14,16 @@
 # ==============================================================================

 """Tests for embedded_ssd_mobilenet_v1_feature_extractor."""
+import unittest
 import numpy as np
 import tensorflow.compat.v1 as tf

 from object_detection.models import embedded_ssd_mobilenet_v1_feature_extractor
 from object_detection.models import ssd_feature_extractor_test
+from object_detection.utils import tf_version


+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class EmbeddedSSDMobileNetV1FeatureExtractorTest(
    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):


--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor_test.py
@@ -14,12 +14,14 @@
 # ==============================================================================

 """Tests for models.faster_rcnn_inception_resnet_v2_feature_extractor."""
-
+import unittest
 import tensorflow.compat.v1 as tf

 from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
+from object_detection.utils import tf_version


+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class FasterRcnnInceptionResnetV2FeatureExtractorTest(tf.test.TestCase):

  def _build_feature_extractor(self, first_stage_features_stride):