Merge remote-tracking branch 'upstream/master' into newavarecords

5a2cf36f · Kaushik Shivakumar · 258ddfc3 · a829e648 · 5a2cf36f · 5a2cf36f
Commit 5a2cf36f authored Jul 23, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
@@ -20,8 +20,8 @@ from __future__ import print_function
 import functools
 import unittest
+from unittest import mock  # pylint: disable=g-importing-member
 from absl.testing import parameterized
-import mock
 import tensorflow.compat.v1 as tf
 import tf_slim as slim
@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
 from object_detection.protos import box_predictor_pb2
 from object_detection.protos import hyperparams_pb2
 from object_detection.protos import post_processing_pb2
-from object_detection.utils import ops
+from object_detection.utils import spatial_transform_ops as spatial_ops
 from object_detection.utils import test_case
 from object_detection.utils import test_utils
 from object_detection.utils import tf_version
@@ -109,7 +109,6 @@ class FakeFasterRCNNKerasFeatureExtractor(
    ])
-@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
  def _get_model(self, box_predictor, **common_kwargs):
@@ -363,8 +362,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
          max_negatives_per_positive=None)
    crop_and_resize_fn = (
-        ops.matmul_crop_and_resize
+        spatial_ops.multilevel_matmul_crop_and_resize
-        if use_matmul_crop_and_resize else ops.native_crop_and_resize)
+        if use_matmul_crop_and_resize
+        else spatial_ops.multilevel_native_crop_and_resize)
    common_kwargs = {
        'is_training':
            is_training,
@@ -439,15 +439,16 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
            masks_are_class_agnostic=masks_are_class_agnostic,
            share_box_across_classes=share_box_across_classes), **common_kwargs)
+  @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
  @mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib')
-  def test_prediction_mock(self, mock_context_rcnn_lib):
+  def test_prediction_mock_tf1(self, mock_context_rcnn_lib_v1):
-    """Mocks the context_rcnn_lib module to test the prediction.
+    """Mocks the context_rcnn_lib_v1 module to test the prediction.
    Using mock object so that we can ensure compute_box_context_attention is
    called in side the prediction function.
    Args:
-      mock_context_rcnn_lib: mock module for the context_rcnn_lib.
+      mock_context_rcnn_lib_v1: mock module for the context_rcnn_lib_v1.
    """
    model = self._build_model(
        is_training=False,
@@ -456,7 +457,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
        num_classes=42)
    mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32)
-    mock_context_rcnn_lib.compute_box_context_attention.return_value = mock_tensor
+    mock_context_rcnn_lib_v1.compute_box_context_attention.return_value = mock_tensor
    inputs_shape = (2, 20, 20, 3)
    inputs = tf.cast(
        tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
@@ -478,7 +479,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
    side_inputs = model.get_side_inputs(features)
    _ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs)
-    mock_context_rcnn_lib.compute_box_context_attention.assert_called_once()
+    mock_context_rcnn_lib_v1.compute_box_context_attention.assert_called_once()
  @parameterized.named_parameters(
      {'testcase_name': 'static_shapes', 'static_shapes': True},
@@ -517,7 +518,6 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
      }
      side_inputs = model.get_side_inputs(features)
      prediction_dict = model.predict(preprocessed_inputs, true_image_shapes,
                                      **side_inputs)
      return (prediction_dict['rpn_box_predictor_features'],

--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
@@ -1948,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
    Returns:
      A float32 tensor with shape [K, new_height, new_width, depth].
    """
+    features_to_crop = [features_to_crop]
+    num_levels = len(features_to_crop)
+    box_levels = None
+    if num_levels != 1:
+      # If there are multiple levels to select, get the box levels
+      box_levels = ops.fpn_feature_levels(num_levels, num_levels - 1,
+                                          1.0/224, proposal_boxes_normalized)
    cropped_regions = self._flatten_first_two_dimensions(
        self._crop_and_resize_fn(
-            features_to_crop, proposal_boxes_normalized,
+            features_to_crop, proposal_boxes_normalized, box_levels,
            [self._initial_crop_size, self._initial_crop_size]))
    return self._maxpool_layer(cropped_regions)
@@ -2517,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
            image_shape[1], image_shape[2], check_range=False).get()
        flat_cropped_gt_mask = self._crop_and_resize_fn(
-            tf.expand_dims(flat_gt_masks, -1),
+            [tf.expand_dims(flat_gt_masks, -1)],
-            tf.expand_dims(flat_normalized_proposals, axis=1),
+            tf.expand_dims(flat_normalized_proposals, axis=1), None,
            [mask_height, mask_width])
        # Without stopping gradients into cropped groundtruth masks the
        # performance with 100-padded groundtruth masks when batch size > 1 is
@@ -2547,7 +2554,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
      if second_stage_mask_loss is not None:
        mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
                                second_stage_mask_loss, name='mask_loss')
-        loss_dict[mask_loss.op.name] = mask_loss
+        loss_dict['Loss/BoxClassifierLoss/mask_loss'] = mask_loss
    return loss_dict
  def _get_mask_proposal_boxes_and_classes(

--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
 from object_detection.protos import box_predictor_pb2
 from object_detection.protos import hyperparams_pb2
 from object_detection.protos import post_processing_pb2
-from object_detection.utils import ops
+from object_detection.utils import spatial_transform_ops as spatial_ops
 from object_detection.utils import test_case
 from object_detection.utils import test_utils
 from object_detection.utils import tf_version
@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
          max_negatives_per_positive=None)
    crop_and_resize_fn = (
-        ops.matmul_crop_and_resize
+        spatial_ops.multilevel_matmul_crop_and_resize
-        if use_matmul_crop_and_resize else ops.native_crop_and_resize)
+        if use_matmul_crop_and_resize
+        else spatial_ops.multilevel_native_crop_and_resize)
    common_kwargs = {
        'is_training':
            is_training,

--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
    return eval_metric_ops
-def _check_mask_type_and_value(array_name, masks):
+def convert_masks_to_binary(masks):
-  """Checks whether mask dtype is uint8 and the values are either 0 or 1."""
+  """Converts masks to 0 or 1 and uint8 type."""
-  if masks.dtype != np.uint8:
+  return (masks > 0).astype(np.uint8)
-    raise ValueError('{} must be of type np.uint8. Found {}.'.format(
-        array_name, masks.dtype))
-  if np.any(np.logical_and(masks != 0, masks != 1)):
-    raise ValueError('{} elements can only be either 0 or 1.'.format(
-        array_name))
 class CocoKeypointEvaluator(CocoDetectionEvaluator):
@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
    groundtruth_instance_masks = groundtruth_dict[
        standard_fields.InputDataFields.groundtruth_instance_masks]
-    _check_mask_type_and_value(standard_fields.InputDataFields.
+    groundtruth_instance_masks = convert_masks_to_binary(
-                               groundtruth_instance_masks,
+        groundtruth_instance_masks)
-                               groundtruth_instance_masks)
    self._groundtruth_list.extend(
        coco_tools.
        ExportSingleImageGroundtruthToCoco(
@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
                       'are incompatible: {} vs {}'.format(
                           groundtruth_masks_shape,
                           detection_masks.shape))
-    _check_mask_type_and_value(standard_fields.DetectionResultFields.
+    detection_masks = convert_masks_to_binary(detection_masks)
-                               detection_masks,
-                               detection_masks)
    self._detection_masks_list.extend(
        coco_tools.ExportSingleImageDetectionMasksToCoco(
            image_id=image_id,

--- a/research/object_detection/metrics/coco_evaluation_test.py
+++ b/research/object_detection/metrics/coco_evaluation_test.py
@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
        image_id='image3',
        detections_dict={
            standard_fields.DetectionResultFields.detection_boxes:
-            np.array([[25., 25., 50., 50.]]),
+                np.array([[25., 25., 50., 50.]]),
            standard_fields.DetectionResultFields.detection_scores:
-            np.array([.8]),
+                np.array([.8]),
            standard_fields.DetectionResultFields.detection_classes:
-            np.array([1]),
+                np.array([1]),
            standard_fields.DetectionResultFields.detection_masks:
-            np.pad(np.ones([1, 25, 25], dtype=np.uint8),
+                # The value of 5 is equivalent to 1, since masks will be
-                   ((0, 0), (10, 10), (10, 10)), mode='constant')
+                # thresholded and binarized before evaluation.
+                np.pad(5 * np.ones([1, 25, 25], dtype=np.uint8),
+                       ((0, 0), (10, 10), (10, 10)), mode='constant')
        })
    metrics = coco_evaluator.evaluate()
    self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0)

--- a/research/object_detection/metrics/oid_challenge_evaluation_utils.py
+++ b/research/object_detection/metrics/oid_challenge_evaluation_utils.py
@@ -136,15 +136,15 @@ def build_groundtruth_dictionary(data, class_label_map):
  dictionary = {
      standard_fields.InputDataFields.groundtruth_boxes:
-          data_location[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(),
+          data_location[['YMin', 'XMin', 'YMax', 'XMax']].to_numpy(),
      standard_fields.InputDataFields.groundtruth_classes:
          data_location['LabelName'].map(lambda x: class_label_map[x]
-                                        ).as_matrix(),
+                                        ).to_numpy(),
      standard_fields.InputDataFields.groundtruth_group_of:
-          data_location['IsGroupOf'].as_matrix().astype(int),
+          data_location['IsGroupOf'].to_numpy().astype(int),
      standard_fields.InputDataFields.groundtruth_image_classes:
          data_labels['LabelName'].map(lambda x: class_label_map[x]
-                                      ).as_matrix(),
+                                      ).to_numpy(),
  }
  if 'Mask' in data_location:
@@ -179,9 +179,9 @@ def build_predictions_dictionary(data, class_label_map):
  """
  dictionary = {
      standard_fields.DetectionResultFields.detection_classes:
-          data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(),
+          data['LabelName'].map(lambda x: class_label_map[x]).to_numpy(),
      standard_fields.DetectionResultFields.detection_scores:
-          data['Score'].as_matrix()
+          data['Score'].to_numpy()
  }
  if 'Mask' in data:
@@ -192,6 +192,6 @@ def build_predictions_dictionary(data, class_label_map):
  else:
    dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[
        'YMin', 'XMin', 'YMax', 'XMax'
-    ]].as_matrix()
+    ]].to_numpy()
  return dictionary
--- a/research/object_detection/metrics/oid_vrd_challenge_evaluation_utils.py
+++ b/research/object_detection/metrics/oid_vrd_challenge_evaluation_utils.py
@@ -53,16 +53,16 @@ def build_groundtruth_vrd_dictionary(data, class_label_map,
  boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
  boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
-                                 'XMax1']].as_matrix()
+                                 'XMax1']].to_numpy()
-  boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix()
+  boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].to_numpy()
  labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
  labels['subject'] = data_boxes['LabelName1'].map(
-      lambda x: class_label_map[x]).as_matrix()
+      lambda x: class_label_map[x]).to_numpy()
  labels['object'] = data_boxes['LabelName2'].map(
-      lambda x: class_label_map[x]).as_matrix()
+      lambda x: class_label_map[x]).to_numpy()
  labels['relation'] = data_boxes['RelationshipLabel'].map(
-      lambda x: relationship_label_map[x]).as_matrix()
+      lambda x: relationship_label_map[x]).to_numpy()
  return {
      standard_fields.InputDataFields.groundtruth_boxes:
@@ -71,7 +71,7 @@ def build_groundtruth_vrd_dictionary(data, class_label_map,
          labels,
      standard_fields.InputDataFields.groundtruth_image_classes:
          data_labels['LabelName'].map(lambda x: class_label_map[x])
-          .as_matrix(),
+          .to_numpy(),
  }
@@ -104,16 +104,16 @@ def build_predictions_vrd_dictionary(data, class_label_map,
  boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
  boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
-                                 'XMax1']].as_matrix()
+                                 'XMax1']].to_numpy()
-  boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix()
+  boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].to_numpy()
  labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
  labels['subject'] = data_boxes['LabelName1'].map(
-      lambda x: class_label_map[x]).as_matrix()
+      lambda x: class_label_map[x]).to_numpy()
  labels['object'] = data_boxes['LabelName2'].map(
-      lambda x: class_label_map[x]).as_matrix()
+      lambda x: class_label_map[x]).to_numpy()
  labels['relation'] = data_boxes['RelationshipLabel'].map(
-      lambda x: relationship_label_map[x]).as_matrix()
+      lambda x: relationship_label_map[x]).to_numpy()
  return {
      standard_fields.DetectionResultFields.detection_boxes:
@@ -121,5 +121,5 @@ def build_predictions_vrd_dictionary(data, class_label_map,
      standard_fields.DetectionResultFields.detection_classes:
          labels,
      standard_fields.DetectionResultFields.detection_scores:
-          data_boxes['Score'].as_matrix()
+          data_boxes['Score'].to_numpy()
  }
--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils
 # pylint: disable=g-import-not-at-top
 try:
  from tensorflow.contrib import learn as contrib_learn
-  from tensorflow.contrib import tpu as contrib_tpu
 except ImportError:
  # TF 2.0 doesn't ship with contrib.
  pass
@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
        of groundtruth boxes per image..
      'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
        tensor of keypoints (if provided in groundtruth).
+      'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
+        with the number of DensePose points for each instance (if provided in
+        groundtruth).
+      'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
+        max_sampled_points] int32 tensor with the part ids for each DensePose
+        sampled point (if provided in groundtruth).
+      'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
+        max_sampled_points, 4] containing the DensePose surface coordinates for
+        each sampled point (if provided in groundtruth).
      'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
        group_of annotations (if provided in groundtruth).
      'groundtruth_labeled_classes': [batch_size, num_classes] int64
@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
      groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
          labeled_classes)
+  if detection_model.groundtruth_has_field(
+      fields.BoxListFields.densepose_num_points):
+    groundtruth[input_data_fields.groundtruth_dp_num_points] = tf.stack(
+        detection_model.groundtruth_lists(
+            fields.BoxListFields.densepose_num_points))
+  if detection_model.groundtruth_has_field(
+      fields.BoxListFields.densepose_part_ids):
+    groundtruth[input_data_fields.groundtruth_dp_part_ids] = tf.stack(
+        detection_model.groundtruth_lists(
+            fields.BoxListFields.densepose_part_ids))
+  if detection_model.groundtruth_has_field(
+      fields.BoxListFields.densepose_surface_coords):
+    groundtruth[input_data_fields.groundtruth_dp_surface_coords] = tf.stack(
+        detection_model.groundtruth_lists(
+            fields.BoxListFields.densepose_surface_coords))
  groundtruth[input_data_fields.num_groundtruth_boxes] = (
      tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
  return groundtruth
@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
        fields.InputDataFields.groundtruth_boxes,
        fields.InputDataFields.groundtruth_keypoints,
        fields.InputDataFields.groundtruth_keypoint_visibilities,
+        fields.InputDataFields.groundtruth_dp_num_points,
+        fields.InputDataFields.groundtruth_dp_part_ids,
+        fields.InputDataFields.groundtruth_dp_surface_coords,
        fields.InputDataFields.groundtruth_group_of,
        fields.InputDataFields.groundtruth_difficult,
        fields.InputDataFields.groundtruth_is_crowd,
@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels):
  if fields.InputDataFields.groundtruth_keypoint_visibilities in labels:
    gt_keypoint_visibilities_list = labels[
        fields.InputDataFields.groundtruth_keypoint_visibilities]
+  gt_dp_num_points_list = None
+  if fields.InputDataFields.groundtruth_dp_num_points in labels:
+    gt_dp_num_points_list = labels[
+        fields.InputDataFields.groundtruth_dp_num_points]
+  gt_dp_part_ids_list = None
+  if fields.InputDataFields.groundtruth_dp_part_ids in labels:
+    gt_dp_part_ids_list = labels[
+        fields.InputDataFields.groundtruth_dp_part_ids]
+  gt_dp_surface_coords_list = None
+  if fields.InputDataFields.groundtruth_dp_surface_coords in labels:
+    gt_dp_surface_coords_list = labels[
+        fields.InputDataFields.groundtruth_dp_surface_coords]
  gt_weights_list = None
  if fields.InputDataFields.groundtruth_weights in labels:
    gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels):
      groundtruth_masks_list=gt_masks_list,
      groundtruth_keypoints_list=gt_keypoints_list,
      groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list,
+      groundtruth_dp_num_points_list=gt_dp_num_points_list,
+      groundtruth_dp_part_ids_list=gt_dp_part_ids_list,
+      groundtruth_dp_surface_coords_list=gt_dp_surface_coords_list,
      groundtruth_weights_list=gt_weights_list,
      groundtruth_is_crowd_list=gt_is_crowd_list,
      groundtruth_group_of_list=gt_group_of_list,
      groundtruth_area_list=gt_area_list)
-def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
+def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
                    postprocess_on_cpu=False):
  """Creates a model function for `Estimator`.
@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
    side_inputs = detection_model.get_side_inputs(features)
    if use_tpu and train_config.use_bfloat16:
-      with contrib_tpu.bfloat16_scope():
+      with tf.tpu.bfloat16_scope():
        prediction_dict = detection_model.predict(
            preprocessed_images,
            features[fields.InputDataFields.true_image_shape], **side_inputs)
@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
    if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
      if use_tpu and postprocess_on_cpu:
-        detections = contrib_tpu.outside_compilation(
+        detections = tf.tpu.outside_compilation(
            postprocess_wrapper,
            (prediction_dict,
             features[fields.InputDataFields.true_image_shape]))
@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
    if mode == tf.estimator.ModeKeys.TRAIN:
      if use_tpu:
-        training_optimizer = contrib_tpu.CrossShardOptimizer(training_optimizer)
+        training_optimizer = tf.tpu.CrossShardOptimizer(training_optimizer)
      # Optionally freeze some layers by setting their gradients to be zero.
      trainable_variables = None
@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
    # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
    if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
-      return contrib_tpu.TPUEstimatorSpec(
+      return tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          scaffold_fn=scaffold_fn,
          predictions=detections,
@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
 def create_estimator_and_inputs(run_config,
-                                hparams,
+                                hparams=None,
-                                pipeline_config_path,
+                                pipeline_config_path=None,
                                config_override=None,
                                train_steps=None,
                                sample_1_of_n_eval_examples=1,
@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config,
  Args:
    run_config: A `RunConfig`.
-    hparams: A `HParams`.
+    hparams: (optional) A `HParams`.
    pipeline_config_path: A path to a pipeline config file.
    config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
      override the config from `pipeline_config_path`.
@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config,
      model_config=model_config, predict_input_config=eval_input_configs[0])
  # Read export_to_tpu from hparams if not passed.
-  if export_to_tpu is None:
+  if export_to_tpu is None and hparams is not None:
    export_to_tpu = hparams.get('export_to_tpu', False)
  tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
                  use_tpu, export_to_tpu)
  model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu,
                              postprocess_on_cpu)
  if use_tpu_estimator:
-    estimator = contrib_tpu.TPUEstimator(
+    estimator = tf.estimator.tpu.TPUEstimator(
        model_fn=model_fn,
        train_batch_size=train_config.batch_size,
        # For each core, only batch size 1 is supported for eval.

--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -23,6 +23,7 @@ import os
 import time
 import tensorflow.compat.v1 as tf
+import tensorflow.compat.v2 as tf2
 from object_detection import eval_util
 from object_detection import inputs
@@ -93,6 +94,12 @@ def _compute_losses_and_predictions_dicts(
          instance masks for objects.
        labels[fields.InputDataFields.groundtruth_keypoints] is a
          float32 tensor containing keypoints for each box.
+        labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
+          tensor with the number of sampled DensePose points per object.
+        labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32
+          tensor with the DensePose part ids (0-indexed) per object.
+        labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
+          float32 tensor with the DensePose surface coordinates.
        labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor
          containing group_of annotations.
        labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
@@ -111,7 +118,8 @@ def _compute_losses_and_predictions_dicts(
  prediction_dict = model.predict(
      preprocessed_images,
-      features[fields.InputDataFields.true_image_shape])
+      features[fields.InputDataFields.true_image_shape],
+      **model.get_side_inputs(features))
  prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict)
  losses_dict = model.loss(
@@ -195,6 +203,17 @@ def eager_train_step(detection_model,
        labels[fields.InputDataFields.groundtruth_keypoints] is a
          [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
          keypoints for each box.
+        labels[fields.InputDataFields.groundtruth_dp_num_points] is a
+          [batch_size, num_boxes] int32 tensor with the number of DensePose
+          sampled points per instance.
+        labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
+          [batch_size, num_boxes, max_sampled_points] int32 tensor with the
+          part ids (0-indexed) for each instance.
+        labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
+          [batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
+          surface coordinates for each point. Each surface coordinate is of the
+          form (y, x, v, u) where (y, x) are normalized image locations and
+          (v, u) are part-relative normalized surface coordinates.
        labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
          k-hot tensor of classes.
    unpad_groundtruth_tensors: A parameter passed to unstack_batch.
@@ -336,11 +355,18 @@ def load_fine_tune_checkpoint(
        labels)
  strategy = tf.compat.v2.distribute.get_strategy()
-  strategy.experimental_run_v2(
+  if hasattr(tf.distribute.Strategy, 'run'):
-      _dummy_computation_fn, args=(
+    strategy.run(
-          features,
+        _dummy_computation_fn, args=(
-          labels,
+            features,
-      ))
+            labels,
+        ))
+  else:
+    strategy.experimental_run_v2(
+        _dummy_computation_fn, args=(
+            features,
+            labels,
+        ))
  restore_from_objects_dict = model.restore_from_objects(
      fine_tune_checkpoint_type=checkpoint_type)
@@ -391,6 +417,7 @@ def train_loop(
    save_final_config=False,
    checkpoint_every_n=1000,
    checkpoint_max_to_keep=7,
+    record_summaries=True,
    **kwargs):
  """Trains a model using eager + functions.
@@ -420,6 +447,7 @@ def train_loop(
      Checkpoint every n training steps.
    checkpoint_max_to_keep:
      int, the number of most recent checkpoints to keep in the model directory.
+    record_summaries: Boolean, whether or not to record summaries.
    **kwargs: Additional keyword arguments for configuration override.
  """
  ## Parse the configs
@@ -506,8 +534,11 @@ def train_loop(
  # is the chief.
  summary_writer_filepath = get_filepath(strategy,
                                         os.path.join(model_dir, 'train'))
-  summary_writer = tf.compat.v2.summary.create_file_writer(
+  if record_summaries:
-      summary_writer_filepath)
+    summary_writer = tf.compat.v2.summary.create_file_writer(
+        summary_writer_filepath)
+  else:
+    summary_writer = tf2.summary.create_noop_writer()
  if use_tpu:
    num_steps_per_iteration = 100
@@ -562,8 +593,12 @@ def train_loop(
        def _sample_and_train(strategy, train_step_fn, data_iterator):
          features, labels = data_iterator.next()
-          per_replica_losses = strategy.experimental_run_v2(
+          if hasattr(tf.distribute.Strategy, 'run'):
-              train_step_fn, args=(features, labels))
+            per_replica_losses = strategy.run(
+                train_step_fn, args=(features, labels))
+          else:
+            per_replica_losses = strategy.experimental_run_v2(
+                train_step_fn, args=(features, labels))
          # TODO(anjalisridhar): explore if it is safe to remove the
          ## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
          return strategy.reduce(tf.distribute.ReduceOp.SUM,
@@ -575,7 +610,9 @@ def train_loop(
          if num_steps_per_iteration > 1:
            for _ in tf.range(num_steps_per_iteration - 1):
-              _sample_and_train(strategy, train_step_fn, data_iterator)
+              # Following suggestion on yaqs/5402607292645376
+              with tf.name_scope(''):
+                _sample_and_train(strategy, train_step_fn, data_iterator)
          return _sample_and_train(strategy, train_step_fn, data_iterator)
@@ -767,7 +804,16 @@ def eager_eval_loop(
          name='eval_side_by_side_' + str(i),
          step=global_step,
          data=sbys_images,
-          max_outputs=1)
+          max_outputs=eval_config.num_visualizations)
+      if eval_util.has_densepose(eval_dict):
+        dp_image_list = vutils.draw_densepose_visualizations(
+            eval_dict)
+        dp_images = tf.concat(dp_image_list, axis=0)
+        tf.compat.v2.summary.image(
+            name='densepose_detections_' + str(i),
+            step=global_step,
+            data=dp_images,
+            max_outputs=eval_config.num_visualizations)
    if evaluators is None:
      if class_agnostic:
@@ -817,6 +863,7 @@ def eval_continuously(
    checkpoint_dir=None,
    wait_interval=180,
    timeout=3600,
+    eval_index=None,
    **kwargs):
  """Run continuous evaluation of a detection model eagerly.
@@ -846,6 +893,8 @@ def eval_continuously(
      new checkpoint.
    timeout: The maximum number of seconds to wait for a checkpoint. Execution
      will terminate if no new checkpoints are found after these many seconds.
+    eval_index: int, optional If give, only evaluate the dataset at the given
+      index.
    **kwargs: Additional keyword arguments for configuration override.
  """
@@ -899,6 +948,11 @@ def eval_continuously(
        model=detection_model)
    eval_inputs.append((eval_input_config.name, next_eval_input))
+  if eval_index is not None:
+    eval_inputs = [eval_inputs[eval_index]]
+    tf.logging.info('eval_index selected - {}'.format(
+        eval_inputs))
  global_step = tf.compat.v2.Variable(
      0, trainable=False, dtype=tf.compat.v2.dtypes.int64)

--- a/research/object_detection/model_main.py
+++ b/research/object_detection/model_main.py
@@ -22,7 +22,6 @@ from absl import flags
 import tensorflow.compat.v1 as tf
-from object_detection import model_hparams
 from object_detection import model_lib
 flags.DEFINE_string(
@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
                     'one of every n train input examples for evaluation, '
                     'where n is provided. This is only used if '
                     '`eval_training_data` is True.')
-flags.DEFINE_string(
-    'hparams_overrides', None, 'Hyperparameter overrides, '
-    'represented as a string containing comma-separated '
-    'hparam_name=value pairs.')
 flags.DEFINE_string(
    'checkpoint_dir', None, 'Path to directory holding a checkpoint.  If '
    '`checkpoint_dir` is provided, this binary operates in eval-only mode, '
@@ -68,7 +63,6 @@ def main(unused_argv):
  train_and_eval_dict = model_lib.create_estimator_and_inputs(
      run_config=config,
-      hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
      pipeline_config_path=FLAGS.pipeline_config_path,
      train_steps=FLAGS.num_train_steps,
      sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,

--- a/research/object_detection/model_main_tf2.py
+++ b/research/object_detection/model_main_tf2.py
@@ -54,10 +54,19 @@ flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
                     'evaluation checkpoint before exiting.')
 flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.')
+flags.DEFINE_string(
+    'tpu_name',
+    default=None,
+    help='Name of the Cloud TPU for Cluster Resolvers.')
 flags.DEFINE_integer(
    'num_workers', 1, 'When num_workers > 1, training uses '
    'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
    'MirroredStrategy.')
+flags.DEFINE_integer(
+    'checkpoint_every_n', 1000, 'Integer defining how often we checkpoint.')
+flags.DEFINE_boolean('record_summaries', True,
+                     ('Whether or not to record summaries during'
+                      ' training.'))
 FLAGS = flags.FLAGS
@@ -79,7 +88,10 @@ def main(unused_argv):
        wait_interval=300, timeout=FLAGS.eval_timeout)
  else:
    if FLAGS.use_tpu:
-      resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
+      # TPU is automatically inferred if tpu_name is None and
+      # we are running under cloud ai-platform.
+      resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
+          FLAGS.tpu_name)
      tf.config.experimental_connect_to_cluster(resolver)
      tf.tpu.experimental.initialize_tpu_system(resolver)
      strategy = tf.distribute.experimental.TPUStrategy(resolver)
@@ -93,7 +105,9 @@ def main(unused_argv):
          pipeline_config_path=FLAGS.pipeline_config_path,
          model_dir=FLAGS.model_dir,
          train_steps=FLAGS.num_train_steps,
-          use_tpu=FLAGS.use_tpu)
+          use_tpu=FLAGS.use_tpu,
+          checkpoint_every_n=FLAGS.checkpoint_every_n,
+          record_summaries=FLAGS.record_summaries)
 if __name__ == '__main__':
  tf.compat.v1.app.run()
--- a/research/object_detection/model_tpu_main.py
+++ b/research/object_detection/model_tpu_main.py
@@ -26,18 +26,8 @@ from absl import flags
 import tensorflow.compat.v1 as tf
-from object_detection import model_hparams
 from object_detection import model_lib
-# pylint: disable=g-import-not-at-top
-try:
-  from tensorflow.contrib import cluster_resolver as contrib_cluster_resolver
-  from tensorflow.contrib import tpu as contrib_tpu
-except ImportError:
-  # TF 2.0 doesn't ship with contrib.
-  pass
-# pylint: enable=g-import-not-at-top
 tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
 # Cloud TPU Cluster Resolvers
@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train',
 flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
                     'this is not provided, batch size is read from training '
                     'config.')
-flags.DEFINE_string(
-    'hparams_overrides', None, 'Comma-separated list of '
-    'hyperparameters to override defaults.')
 flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
 flags.DEFINE_boolean('eval_training_data', False,
                     'If training data should be evaluated for this job.')
@@ -99,15 +85,15 @@ def main(unused_argv):
  flags.mark_flag_as_required('pipeline_config_path')
  tpu_cluster_resolver = (
-      contrib_cluster_resolver.TPUClusterResolver(
+      tf.distribute.cluster_resolver.TPUClusterResolver(
          tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project))
  tpu_grpc_url = tpu_cluster_resolver.get_master()
-  config = contrib_tpu.RunConfig(
+  config = tf.estimator.tpu.RunConfig(
      master=tpu_grpc_url,
      evaluation_master=tpu_grpc_url,
      model_dir=FLAGS.model_dir,
-      tpu_config=contrib_tpu.TPUConfig(
+      tpu_config=tf.estimator.tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_shards))
@@ -117,7 +103,6 @@ def main(unused_argv):
  train_and_eval_dict = model_lib.create_estimator_and_inputs(
      run_config=config,
-      hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
      pipeline_config_path=FLAGS.pipeline_config_path,
      train_steps=FLAGS.num_train_steps,
      sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,

--- a/research/object_detection/models/center_net_mobilenet_v2_feature_extractor.py
+++ b/research/object_detection/models/center_net_mobilenet_v2_feature_extractor.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""MobileNet V2[1] feature extractor for CenterNet[2] meta architecture.
+[1]: https://arxiv.org/abs/1801.04381
+[2]: https://arxiv.org/abs/1904.07850
+"""
+import tensorflow.compat.v1 as tf
+from object_detection.meta_architectures import center_net_meta_arch
+from object_detection.models.keras_models import mobilenet_v2 as mobilenetv2
+class CenterNetMobileNetV2FeatureExtractor(
+    center_net_meta_arch.CenterNetFeatureExtractor):
+  """The MobileNet V2 feature extractor for CenterNet."""
+  def __init__(self,
+               mobilenet_v2_net,
+               channel_means=(0., 0., 0.),
+               channel_stds=(1., 1., 1.),
+               bgr_ordering=False):
+    """Intializes the feature extractor.
+    Args:
+      mobilenet_v2_net: The underlying mobilenet_v2 network to use.
+      channel_means: A tuple of floats, denoting the mean of each channel
+        which will be subtracted from it.
+      channel_stds: A tuple of floats, denoting the standard deviation of each
+        channel. Each channel will be divided by its standard deviation value.
+      bgr_ordering: bool, if set will change the channel ordering to be in the
+        [blue, red, green] order.
+    """
+    super(CenterNetMobileNetV2FeatureExtractor, self).__init__(
+        channel_means=channel_means,
+        channel_stds=channel_stds,
+        bgr_ordering=bgr_ordering)
+    self._network = mobilenet_v2_net
+    output = self._network(self._network.input)
+    # TODO(nkhadke): Try out MobileNet+FPN next (skip connections are cheap and
+    # should help with performance).
+    # MobileNet by itself transforms a 224x224x3 volume into a 7x7x1280, which
+    # leads to a stride of 32. We perform upsampling to get it to a target
+    # stride of 4.
+    for num_filters in [256, 128, 64]:
+      # 1. We use a simple convolution instead of a deformable convolution
+      conv = tf.keras.layers.Conv2D(
+          filters=num_filters, kernel_size=1, strides=1, padding='same')
+      output = conv(output)
+      output = tf.keras.layers.BatchNormalization()(output)
+      output = tf.keras.layers.ReLU()(output)
+      # 2. We use the default initialization for the convolution layers
+      # instead of initializing it to do bilinear upsampling.
+      conv_transpose = tf.keras.layers.Conv2DTranspose(
+          filters=num_filters, kernel_size=3, strides=2, padding='same')
+      output = conv_transpose(output)
+      output = tf.keras.layers.BatchNormalization()(output)
+      output = tf.keras.layers.ReLU()(output)
+    self._network = tf.keras.models.Model(
+        inputs=self._network.input, outputs=output)
+  def preprocess(self, resized_inputs):
+    resized_inputs = super(CenterNetMobileNetV2FeatureExtractor,
+                           self).preprocess(resized_inputs)
+    return tf.keras.applications.mobilenet_v2.preprocess_input(resized_inputs)
+  def load_feature_extractor_weights(self, path):
+    self._network.load_weights(path)
+  def get_base_model(self):
+    return self._network
+  def call(self, inputs):
+    return [self._network(inputs)]
+  @property
+  def out_stride(self):
+    """The stride in the output image of the network."""
+    return 4
+  @property
+  def num_feature_outputs(self):
+    """The number of feature outputs returned by the feature extractor."""
+    return 1
+  def get_model(self):
+    return self._network
+def mobilenet_v2(channel_means, channel_stds, bgr_ordering):
+  """The MobileNetV2 backbone for CenterNet."""
+  # We set 'is_training' to True for now.
+  network = mobilenetv2.mobilenet_v2(True, include_top=False)
+  return CenterNetMobileNetV2FeatureExtractor(
+      network,
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering)
--- a/research/object_detection/models/center_net_mobilenet_v2_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/center_net_mobilenet_v2_feature_extractor_tf2_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing mobilenet_v2 feature extractor for CenterNet."""
+import unittest
+import numpy as np
+import tensorflow.compat.v1 as tf
+from object_detection.models import center_net_mobilenet_v2_feature_extractor
+from object_detection.models.keras_models import mobilenet_v2
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class CenterNetMobileNetV2FeatureExtractorTest(test_case.TestCase):
+  def test_center_net_mobilenet_v2_feature_extractor(self):
+    net = mobilenet_v2.mobilenet_v2(True, include_top=False)
+    model = center_net_mobilenet_v2_feature_extractor.CenterNetMobileNetV2FeatureExtractor(
+        net)
+    def graph_fn():
+      img = np.zeros((8, 224, 224, 3), dtype=np.float32)
+      processed_img = model.preprocess(img)
+      return model(processed_img)
+    outputs = self.execute(graph_fn, [])
+    self.assertEqual(outputs.shape, (8, 56, 56, 64))
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py
+++ b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py
@@ -21,9 +21,14 @@
 import tensorflow.compat.v1 as tf
 from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor
+from object_detection.models.keras_models import resnet_v1
 _RESNET_MODEL_OUTPUT_LAYERS = {
+    'resnet_v1_18': ['conv2_block2_out', 'conv3_block2_out',
+                     'conv4_block2_out', 'conv5_block2_out'],
+    'resnet_v1_34': ['conv2_block3_out', 'conv3_block4_out',
+                     'conv4_block6_out', 'conv5_block3_out'],
    'resnet_v1_50': ['conv2_block3_out', 'conv3_block4_out',
                     'conv4_block6_out', 'conv5_block3_out'],
    'resnet_v1_101': ['conv2_block3_out', 'conv3_block4_out',
@@ -69,6 +74,10 @@ class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor):
      self._base_model = tf.keras.applications.ResNet50(weights=None)
    elif resnet_type == 'resnet_v1_101':
      self._base_model = tf.keras.applications.ResNet101(weights=None)
+    elif resnet_type == 'resnet_v1_18':
+      self._base_model = resnet_v1.resnet_v1_18(weights=None)
+    elif resnet_type == 'resnet_v1_34':
+      self._base_model = resnet_v1.resnet_v1_34(weights=None)
    else:
      raise ValueError('Unknown Resnet Model {}'.format(resnet_type))
    output_layers = _RESNET_MODEL_OUTPUT_LAYERS[resnet_type]
@@ -174,3 +183,24 @@ def resnet_v1_50_fpn(channel_means, channel_stds, bgr_ordering):
      channel_means=channel_means,
      channel_stds=channel_stds,
      bgr_ordering=bgr_ordering)
+def resnet_v1_34_fpn(channel_means, channel_stds, bgr_ordering):
+  """The ResNet v1 34 FPN feature extractor."""
+  return CenterNetResnetV1FpnFeatureExtractor(
+      resnet_type='resnet_v1_34',
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering
+  )
+def resnet_v1_18_fpn(channel_means, channel_stds, bgr_ordering):
+  """The ResNet v1 18 FPN feature extractor."""
+  return CenterNetResnetV1FpnFeatureExtractor(
+      resnet_type='resnet_v1_18',
+      channel_means=channel_means,
+      channel_stds=channel_stds,
+      bgr_ordering=bgr_ordering)
--- a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py
@@ -31,6 +31,8 @@ class CenterNetResnetV1FpnFeatureExtractorTest(test_case.TestCase,
  @parameterized.parameters(
      {'resnet_type': 'resnet_v1_50'},
      {'resnet_type': 'resnet_v1_101'},
+      {'resnet_type': 'resnet_v1_18'},
+      {'resnet_type': 'resnet_v1_34'},
  )
  def test_correct_output_size(self, resnet_type):
    """Verify that shape of features returned by the backbone is correct."""

--- a/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor.py
@@ -56,7 +56,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
        the resnet_v1.resnet_v1_{50,101,152} models.
      resnet_v1_base_model_name: model name under which to construct resnet v1.
      first_stage_features_stride: See base class.
-      conv_hyperparameters: a `hyperparams_builder.KerasLayerHyperparams` object
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
        containing convolution hyperparameters for the layers added on top of
        the base feature extractor.
      batch_norm_trainable: See base class.
@@ -143,19 +143,21 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
      with tf.name_scope('ResnetV1FPN'):
        full_resnet_v1_model = self._resnet_v1_base_model(
            batchnorm_training=self._train_batch_norm,
-            conv_hyperparams=(self._conv_hyperparams
+            conv_hyperparams=(self._conv_hyperparams if
-                              if self._override_base_feature_extractor_hyperparams
+                              self._override_base_feature_extractor_hyperparams
                              else None),
            classes=None,
            weights=None,
            include_top=False)
-        output_layers = _RESNET_MODEL_OUTPUT_LAYERS[self._resnet_v1_base_model_name]
+        output_layers = _RESNET_MODEL_OUTPUT_LAYERS[
+            self._resnet_v1_base_model_name]
        outputs = [full_resnet_v1_model.get_layer(output_layer_name).output
                   for output_layer_name in output_layers]
        self.classification_backbone = tf.keras.Model(
            inputs=full_resnet_v1_model.inputs,
            outputs=outputs)
-        backbone_outputs = self.classification_backbone(full_resnet_v1_model.inputs)
+        backbone_outputs = self.classification_backbone(
+            full_resnet_v1_model.inputs)
        # construct FPN feature generator
        self._base_fpn_max_level = min(self._fpn_max_level, 5)
@@ -236,7 +238,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
    """
    with tf.name_scope(name):
      with tf.name_scope('ResnetV1FPN'):
-        # TODO: Add a batchnorm layer between two fc layers.
+        # TODO(yiming): Add a batchnorm layer between two fc layers.
        feature_extractor_model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(units=1024, activation='relu'),
@@ -283,12 +285,15 @@ class FasterRCNNResnet50FpnKerasFeatureExtractor(
        fpn_min_level=fpn_min_level,
        fpn_max_level=fpn_max_level,
        additional_layer_depth=additional_layer_depth,
-        override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams
+    )
 class FasterRCNNResnet101FpnKerasFeatureExtractor(
    FasterRCNNResnetV1FpnKerasFeatureExtractor):
  """Faster RCNN with Resnet101 FPN feature extractor."""
  def __init__(self,
               is_training,
               first_stage_features_stride=16,
@@ -323,7 +328,8 @@ class FasterRCNNResnet101FpnKerasFeatureExtractor(
        fpn_min_level=fpn_min_level,
        fpn_max_level=fpn_max_level,
        additional_layer_depth=additional_layer_depth,
-        override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams)
 class FasterRCNNResnet152FpnKerasFeatureExtractor(
@@ -364,4 +370,5 @@ class FasterRCNNResnet152FpnKerasFeatureExtractor(
        fpn_min_level=fpn_min_level,
        fpn_max_level=fpn_max_level,
        additional_layer_depth=additional_layer_depth,
-        override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams)
--- a/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor_tf2_test.py
@@ -21,8 +21,8 @@ from google.protobuf import text_format
 from object_detection.builders import hyperparams_builder
 from object_detection.models import faster_rcnn_resnet_v1_fpn_keras_feature_extractor as frcnn_res_fpn
-from object_detection.utils import tf_version
 from object_detection.protos import hyperparams_pb2
+from object_detection.utils import tf_version
 @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
@@ -40,7 +40,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractorTest(tf.test.TestCase):
        }
      }
    """
-    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams)
    return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
  def _build_feature_extractor(self):

--- a/research/object_detection/models/keras_models/resnet_v1.py
+++ b/research/object_detection/models/keras_models/resnet_v1.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import tensorflow.compat.v1 as tf
+from tensorflow.python.keras.applications import resnet
 from object_detection.core import freezable_batch_norm
 from object_detection.models.keras_models import model_utils
@@ -95,11 +96,11 @@ class _LayersOverride(object):
    self.regularizer = tf.keras.regularizers.l2(weight_decay)
    self.initializer = tf.variance_scaling_initializer()
-  def _FixedPaddingLayer(self, kernel_size, rate=1):
+  def _FixedPaddingLayer(self, kernel_size, rate=1):  # pylint: disable=invalid-name
    return tf.keras.layers.Lambda(
        lambda x: _fixed_padding(x, kernel_size, rate))
-  def Conv2D(self, filters, kernel_size, **kwargs):
+  def Conv2D(self, filters, kernel_size, **kwargs):  # pylint: disable=invalid-name
    """Builds a Conv2D layer according to the current Object Detection config.
    Overrides the Keras Resnet application's convolutions with ones that
@@ -141,7 +142,7 @@ class _LayersOverride(object):
    else:
      return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs)
-  def Activation(self, *args, **kwargs):  # pylint: disable=unused-argument
+  def Activation(self, *args, **kwargs):  # pylint: disable=unused-argument,invalid-name
    """Builds an activation layer.
    Overrides the Keras application Activation layer specified by the
@@ -163,7 +164,7 @@ class _LayersOverride(object):
    else:
      return tf.keras.layers.Lambda(tf.nn.relu, name=name)
-  def BatchNormalization(self, **kwargs):
+  def BatchNormalization(self, **kwargs):  # pylint: disable=invalid-name
    """Builds a normalization layer.
    Overrides the Keras application batch norm with the norm specified by the
@@ -191,7 +192,7 @@ class _LayersOverride(object):
          momentum=self._default_batchnorm_momentum,
          **kwargs)
-  def Input(self, shape):
+  def Input(self, shape):  # pylint: disable=invalid-name
    """Builds an Input layer.
    Overrides the Keras application Input layer with one that uses a
@@ -219,7 +220,7 @@ class _LayersOverride(object):
        input=input_tensor, shape=[None] + shape)
    return model_utils.input_layer(shape, placeholder_with_default)
-  def MaxPooling2D(self, pool_size, **kwargs):
+  def MaxPooling2D(self, pool_size, **kwargs):  # pylint: disable=invalid-name
    """Builds a MaxPooling2D layer with default padding as 'SAME'.
    This is specified by the default resnet arg_scope in slim.
@@ -237,7 +238,7 @@ class _LayersOverride(object):
  # Add alias as Keras also has it.
  MaxPool2D = MaxPooling2D  # pylint: disable=invalid-name
-  def ZeroPadding2D(self, padding, **kwargs):  # pylint: disable=unused-argument
+  def ZeroPadding2D(self, padding, **kwargs):  # pylint: disable=unused-argument,invalid-name
    """Replaces explicit padding in the Keras application with a no-op.
    Args:
@@ -395,3 +396,146 @@ def resnet_v1_152(batchnorm_training,
  return tf.keras.applications.resnet.ResNet152(
      layers=layers_override, **kwargs)
 # pylint: enable=invalid-name
+# The following codes are based on the existing keras ResNet model pattern:
+# google3/third_party/tensorflow/python/keras/applications/resnet.py
+def block_basic(x,
+                filters,
+                kernel_size=3,
+                stride=1,
+                conv_shortcut=False,
+                name=None):
+  """A residual block for ResNet18/34.
+  Arguments:
+      x: input tensor.
+      filters: integer, filters of the bottleneck layer.
+      kernel_size: default 3, kernel size of the bottleneck layer.
+      stride: default 1, stride of the first layer.
+      conv_shortcut: default False, use convolution shortcut if True, otherwise
+        identity shortcut.
+      name: string, block label.
+  Returns:
+    Output tensor for the residual block.
+  """
+  layers = tf.keras.layers
+  bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
+  preact = layers.BatchNormalization(
+      axis=bn_axis, epsilon=1.001e-5, name=name + '_preact_bn')(
+          x)
+  preact = layers.Activation('relu', name=name + '_preact_relu')(preact)
+  if conv_shortcut:
+    shortcut = layers.Conv2D(
+        filters, 1, strides=1, name=name + '_0_conv')(
+            preact)
+  else:
+    shortcut = layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x
+  x = layers.ZeroPadding2D(
+      padding=((1, 1), (1, 1)), name=name + '_1_pad')(
+          preact)
+  x = layers.Conv2D(
+      filters, kernel_size, strides=1, use_bias=False, name=name + '_1_conv')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(
+          x)
+  x = layers.Activation('relu', name=name + '_1_relu')(x)
+  x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
+  x = layers.Conv2D(
+      filters,
+      kernel_size,
+      strides=stride,
+      use_bias=False,
+      name=name + '_2_conv')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(
+          x)
+  x = layers.Activation('relu', name=name + '_2_relu')(x)
+  x = layers.Add(name=name + '_out')([shortcut, x])
+  return x
+def stack_basic(x, filters, blocks, stride1=2, name=None):
+  """A set of stacked residual blocks for ResNet18/34.
+  Arguments:
+      x: input tensor.
+      filters: integer, filters of the bottleneck layer in a block.
+      blocks: integer, blocks in the stacked blocks.
+      stride1: default 2, stride of the first layer in the first block.
+      name: string, stack label.
+  Returns:
+      Output tensor for the stacked blocks.
+  """
+  x = block_basic(x, filters, conv_shortcut=True, name=name + '_block1')
+  for i in range(2, blocks):
+    x = block_basic(x, filters, name=name + '_block' + str(i))
+  x = block_basic(
+      x, filters, stride=stride1, name=name + '_block' + str(blocks))
+  return x
+def resnet_v1_18(include_top=True,
+                 weights='imagenet',
+                 input_tensor=None,
+                 input_shape=None,
+                 pooling=None,
+                 classes=1000,
+                 classifier_activation='softmax'):
+  """Instantiates the ResNet18 architecture."""
+  def stack_fn(x):
+    x = stack_basic(x, 64, 2, stride1=1, name='conv2')
+    x = stack_basic(x, 128, 2, name='conv3')
+    x = stack_basic(x, 256, 2, name='conv4')
+    return stack_basic(x, 512, 2, name='conv5')
+  return resnet.ResNet(
+      stack_fn,
+      True,
+      True,
+      'resnet18',
+      include_top,
+      weights,
+      input_tensor,
+      input_shape,
+      pooling,
+      classes,
+      classifier_activation=classifier_activation)
+def resnet_v1_34(include_top=True,
+                 weights='imagenet',
+                 input_tensor=None,
+                 input_shape=None,
+                 pooling=None,
+                 classes=1000,
+                 classifier_activation='softmax'):
+  """Instantiates the ResNet34 architecture."""
+  def stack_fn(x):
+    x = stack_basic(x, 64, 3, stride1=1, name='conv2')
+    x = stack_basic(x, 128, 4, name='conv3')
+    x = stack_basic(x, 256, 6, name='conv4')
+    return stack_basic(x, 512, 3, name='conv5')
+  return resnet.ResNet(
+      stack_fn,
+      True,
+      True,
+      'resnet34',
+      include_top,
+      weights,
+      input_tensor,
+      input_shape,
+      pooling,
+      classes,
+      classifier_activation=classifier_activation)
--- a/research/object_detection/models/keras_models/resnet_v1_tf2_test.py
+++ b/research/object_detection/models/keras_models/resnet_v1_tf2_test.py
@@ -20,12 +20,13 @@ object detection. To verify the consistency of the two models, we compare:
  2. Number of global variables.
 """
 import unittest
+from absl.testing import parameterized
 import numpy as np
 from six.moves import zip
 import tensorflow.compat.v1 as tf
 from google.protobuf import text_format
 from object_detection.builders import hyperparams_builder
 from object_detection.models.keras_models import resnet_v1
 from object_detection.protos import hyperparams_pb2
@@ -180,5 +181,46 @@ class ResnetV1Test(test_case.TestCase):
      self.assertEqual(len(variables), var_num)
+class ResnetShapeTest(test_case.TestCase, parameterized.TestCase):
+  @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+  @parameterized.parameters(
+      {
+          'resnet_type':
+              'resnet_v1_34',
+          'output_layer_names': [
+              'conv2_block3_out', 'conv3_block4_out', 'conv4_block6_out',
+              'conv5_block3_out'
+          ]
+      }, {
+          'resnet_type':
+              'resnet_v1_18',
+          'output_layer_names': [
+              'conv2_block2_out', 'conv3_block2_out', 'conv4_block2_out',
+              'conv5_block2_out'
+          ]
+      })
+  def test_output_shapes(self, resnet_type, output_layer_names):
+    if resnet_type == 'resnet_v1_34':
+      model = resnet_v1.resnet_v1_34(weights=None)
+    else:
+      model = resnet_v1.resnet_v1_18(weights=None)
+    outputs = [
+        model.get_layer(output_layer_name).output
+        for output_layer_name in output_layer_names
+    ]
+    resnet_model = tf.keras.models.Model(inputs=model.input, outputs=outputs)
+    outputs = resnet_model(np.zeros((2, 64, 64, 3), dtype=np.float32))
+    # Check the shape of 'conv2_block3_out':
+    self.assertEqual(outputs[0].shape, [2, 16, 16, 64])
+    # Check the shape of 'conv3_block4_out':
+    self.assertEqual(outputs[1].shape, [2, 8, 8, 128])
+    # Check the shape of 'conv4_block6_out':
+    self.assertEqual(outputs[2].shape, [2, 4, 4, 256])
+    # Check the shape of 'conv5_block3_out':
+    self.assertEqual(outputs[3].shape, [2, 2, 2, 512])
 if __name__ == '__main__':
  tf.test.main()