Merged commit includes the following changes: (#8809)

320335495 by rathodv: Remove hparams support form TF1 main binaries as its not available in TF1.15 runtime on cloud ai platform. -- 320278161 by ronnyvotel: Exposing DensePose fields to model libraries. -- 320277319 by rathodv: Remove TPU Name check since TPU is automatically inferred under cloud AI platform. -- 320258215 by rathodv: Internal Change. -- 320245458 by yuhuic: Updated the CenterNet restore_from_objects function to be compatible with existing configs that load converted checkpoints. -- 320225405 by jonathanhuang: Small change to Keras box predictor and box heads to fix export errors for SSD and Faster R-CNN. -- 320145077 by aom: Implements EfficientDet feature extractor. -- PiperOrigin-RevId: 320335495 Co-authored-by: TF Object Detection Team <no-reply@google.com>

Merged commit includes the following changes: (#8809)
320335495 by rathodv: Remove hparams support form TF1 main binaries as its not available in TF1.15 runtime on cloud ai platform. -- 320278161 by ronnyvotel: Exposing DensePose fields to model libraries. -- 320277319 by rathodv: Remove TPU Name check since TPU is automatically inferred under cloud AI platform. -- 320258215 by rathodv: Internal Change. -- 320245458 by yuhuic: Updated the CenterNet restore_from_objects function to be compatible with existing configs that load converted checkpoints. -- 320225405 by jonathanhuang: Small change to Keras box predictor and box heads to fix export errors for SSD and Faster R-CNN. -- 320145077 by aom: Implements EfficientDet feature extractor. -- PiperOrigin-RevId: 320335495 Co-authored-by: TF Object Detection Team <no-reply@google.com>
0ad4922f · vivek rathod · GitHub · 571369aa · 0ad4922f · 0ad4922f
Unverified Commit 0ad4922f authored Jul 08, 2020 by vivek rathod Committed by GitHub Jul 08, 2020
18 changed files
--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -16,6 +16,7 @@
 """A function to build a DetectionModel from configuration."""
 import functools
+import sys
 from object_detection.builders import anchor_generator_builder
 from object_detection.builders import box_coder_builder
 from object_detection.builders import box_predictor_builder
@@ -58,6 +59,8 @@ if tf_version.is_tf2():
  from object_detection.models.ssd_mobilenet_v2_fpn_keras_feature_extractor import SSDMobileNetV2FpnKerasFeatureExtractor
  from object_detection.models.ssd_mobilenet_v2_keras_feature_extractor import SSDMobileNetV2KerasFeatureExtractor
  from object_detection.predictors import rfcn_keras_box_predictor
+  if sys.version_info[0] >= 3:
+    from object_detection.models import ssd_efficientnet_bifpn_feature_extractor as ssd_efficientnet_bifpn
 if tf_version.is_tf1():
  from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
@@ -99,6 +102,22 @@ if tf_version.is_tf2():
          ssd_resnet_v1_fpn_keras.SSDResNet101V1FpnKerasFeatureExtractor,
      'ssd_resnet152_v1_fpn_keras':
          ssd_resnet_v1_fpn_keras.SSDResNet152V1FpnKerasFeatureExtractor,
+      'ssd_efficientnet-b0_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB0BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b1_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB1BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b2_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB2BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b3_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB3BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b4_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB4BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b5_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB5BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b6_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB6BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b7_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB7BiFPNKerasFeatureExtractor,
  }
  FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP = {
@@ -310,6 +329,14 @@ def _build_ssd_feature_extractor(feature_extractor_config,
            feature_extractor_config.fpn.additional_layer_depth,
    })
+  if feature_extractor_config.HasField('bifpn'):
+    kwargs.update({
+        'bifpn_min_level': feature_extractor_config.bifpn.min_level,
+        'bifpn_max_level': feature_extractor_config.bifpn.max_level,
+        'bifpn_num_iterations': feature_extractor_config.bifpn.num_iterations,
+        'bifpn_num_filters': feature_extractor_config.bifpn.num_filters,
+        'bifpn_combine_method': feature_extractor_config.bifpn.combine_method,
+    })
  return feature_extractor_class(**kwargs)

--- a/research/object_detection/builders/model_builder_test.py
+++ b/research/object_detection/builders/model_builder_test.py
@@ -39,6 +39,9 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
  def ssd_feature_extractors(self):
    raise NotImplementedError
+  def get_override_base_feature_extractor_hyperparams(self, extractor_type):
+    raise NotImplementedError
  def faster_rcnn_feature_extractors(self):
    raise NotImplementedError
@@ -70,7 +73,6 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
                }
              }
          }
-          override_base_feature_extractor_hyperparams: true
        }
        box_coder {
          faster_rcnn_box_coder {
@@ -205,6 +207,8 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
    for extractor_type, extractor_class in self.ssd_feature_extractors().items(
    ):
      model_proto.ssd.feature_extractor.type = extractor_type
+      model_proto.ssd.feature_extractor.override_base_feature_extractor_hyperparams = (
+          self.get_override_base_feature_extractor_hyperparams(extractor_type))
      model = model_builder.build(model_proto, is_training=True)
      self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
      self.assertIsInstance(model._feature_extractor, extractor_class)

--- a/research/object_detection/builders/model_builder_tf1_test.py
+++ b/research/object_detection/builders/model_builder_tf1_test.py
@@ -38,6 +38,9 @@ class ModelBuilderTF1Test(model_builder_test.ModelBuilderTest):
  def ssd_feature_extractors(self):
    return model_builder.SSD_FEATURE_EXTRACTOR_CLASS_MAP
+  def get_override_base_feature_extractor_hyperparams(self, extractor_type):
+    return extractor_type in {'ssd_inception_v2', 'ssd_inception_v3'}
  def faster_rcnn_feature_extractors(self):
    return model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP

--- a/research/object_detection/builders/model_builder_tf2_test.py
+++ b/research/object_detection/builders/model_builder_tf2_test.py
@@ -42,6 +42,9 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest):
  def ssd_feature_extractors(self):
    return model_builder.SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
+  def get_override_base_feature_extractor_hyperparams(self, extractor_type):
+    return extractor_type in {}
  def faster_rcnn_feature_extractors(self):
    return model_builder.FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP

--- a/research/object_detection/core/box_predictor.py
+++ b/research/object_detection/core/box_predictor.py
@@ -134,7 +134,7 @@ class BoxPredictor(object):
    pass
-class KerasBoxPredictor(tf.keras.Model):
+class KerasBoxPredictor(tf.keras.layers.Layer):
  """Keras-based BoxPredictor."""
  def __init__(self, is_training, num_classes, freeze_batchnorm,

--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        detection_classes: [batch, max_detections]
          (If a model is producing class-agnostic detections, this field may be
          missing)
-        instance_masks: [batch, max_detections, image_height, image_width]
+        detection_masks: [batch, max_detections, mask_height, mask_width]
          (optional)
-        keypoints: [batch, max_detections, num_keypoints, 2] (optional)
+        detection_keypoints: [batch, max_detections, num_keypoints, 2]
+          (optional)
+        detection_keypoint_scores: [batch, max_detections, num_keypoints]
+          (optional)
+        detection_surface_coords: [batch, max_detections, mask_height,
+          mask_width, 2] (optional)
        num_detections: [batch]
        In addition to the above fields this stage also outputs the following

--- a/research/object_detection/meta_architectures/center_net_meta_arch.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch.py
@@ -2749,6 +2749,14 @@ class CenterNetMetaArch(model.DetectionModel):
        checkpoint (with compatible variable names) or to restore from a
        classification checkpoint for initialization prior to training.
        Valid values: `detection`, `classification`. Default 'detection'.
+        'detection': used when loading in the Hourglass model pre-trained on
+          other detection task.
+        'classification': used when loading in the ResNet model pre-trained on
+          image classification task. Note that only the image feature encoding
+          part is loaded but not those upsampling layers.
+        'fine_tune': used when loading the entire CenterNet feature extractor
+          pre-trained on other tasks. The checkpoints saved during CenterNet
+          model training can be directly loaded using this mode.
    Returns:
      A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
@@ -2757,10 +2765,13 @@ class CenterNetMetaArch(model.DetectionModel):
    if fine_tune_checkpoint_type == 'classification':
      return {'feature_extractor': self._feature_extractor.get_base_model()}
-    if fine_tune_checkpoint_type == 'detection':
+    elif fine_tune_checkpoint_type == 'detection':
-      fake_model = tf.train.Checkpoint(
+      return {'feature_extractor': self._feature_extractor.get_model()}
+    elif fine_tune_checkpoint_type == 'fine_tune':
+      feature_extractor_model = tf.train.Checkpoint(
          _feature_extractor=self._feature_extractor)
-      return {'model': fake_model}
+      return {'model': feature_extractor_model}
    else:
      raise ValueError('Not supported  fine tune checkpoint type - {}'.format(

--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
    return eval_metric_ops
-def _check_mask_type_and_value(array_name, masks):
+def convert_masks_to_binary(masks):
-  """Checks whether mask dtype is uint8 and the values are either 0 or 1."""
+  """Converts masks to 0 or 1 and uint8 type."""
-  if masks.dtype != np.uint8:
+  return (masks > 0).astype(np.uint8)
-    raise ValueError('{} must be of type np.uint8. Found {}.'.format(
-        array_name, masks.dtype))
-  if np.any(np.logical_and(masks != 0, masks != 1)):
-    raise ValueError('{} elements can only be either 0 or 1.'.format(
-        array_name))
 class CocoKeypointEvaluator(CocoDetectionEvaluator):
@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
    groundtruth_instance_masks = groundtruth_dict[
        standard_fields.InputDataFields.groundtruth_instance_masks]
-    _check_mask_type_and_value(standard_fields.InputDataFields.
+    groundtruth_instance_masks = convert_masks_to_binary(
-                               groundtruth_instance_masks,
+        groundtruth_instance_masks)
-                               groundtruth_instance_masks)
    self._groundtruth_list.extend(
        coco_tools.
        ExportSingleImageGroundtruthToCoco(
@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
                       'are incompatible: {} vs {}'.format(
                           groundtruth_masks_shape,
                           detection_masks.shape))
-    _check_mask_type_and_value(standard_fields.DetectionResultFields.
+    detection_masks = convert_masks_to_binary(detection_masks)
-                               detection_masks,
-                               detection_masks)
    self._detection_masks_list.extend(
        coco_tools.ExportSingleImageDetectionMasksToCoco(
            image_id=image_id,

--- a/research/object_detection/metrics/coco_evaluation_test.py
+++ b/research/object_detection/metrics/coco_evaluation_test.py
@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
        image_id='image3',
        detections_dict={
            standard_fields.DetectionResultFields.detection_boxes:
-            np.array([[25., 25., 50., 50.]]),
+                np.array([[25., 25., 50., 50.]]),
            standard_fields.DetectionResultFields.detection_scores:
-            np.array([.8]),
+                np.array([.8]),
            standard_fields.DetectionResultFields.detection_classes:
-            np.array([1]),
+                np.array([1]),
            standard_fields.DetectionResultFields.detection_masks:
-            np.pad(np.ones([1, 25, 25], dtype=np.uint8),
+                # The value of 5 is equivalent to 1, since masks will be
-                   ((0, 0), (10, 10), (10, 10)), mode='constant')
+                # thresholded and binarized before evaluation.
+                np.pad(5 * np.ones([1, 25, 25], dtype=np.uint8),
+                       ((0, 0), (10, 10), (10, 10)), mode='constant')
        })
    metrics = coco_evaluator.evaluate()
    self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0)

--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils
 # pylint: disable=g-import-not-at-top
 try:
  from tensorflow.contrib import learn as contrib_learn
-  from tensorflow.contrib import tpu as contrib_tpu
 except ImportError:
  # TF 2.0 doesn't ship with contrib.
  pass
@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
        of groundtruth boxes per image..
      'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
        tensor of keypoints (if provided in groundtruth).
+      'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
+        with the number of DensePose points for each instance (if provided in
+        groundtruth).
+      'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
+        max_sampled_points] int32 tensor with the part ids for each DensePose
+        sampled point (if provided in groundtruth).
+      'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
+        max_sampled_points, 4] containing the DensePose surface coordinates for
+        each sampled point (if provided in groundtruth).
      'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
        group_of annotations (if provided in groundtruth).
      'groundtruth_labeled_classes': [batch_size, num_classes] int64
@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
      groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
          labeled_classes)
+  if detection_model.groundtruth_has_field(
+      fields.BoxListFields.densepose_num_points):
+    groundtruth[input_data_fields.groundtruth_dp_num_points] = tf.stack(
+        detection_model.groundtruth_lists(
+            fields.BoxListFields.densepose_num_points))
+  if detection_model.groundtruth_has_field(
+      fields.BoxListFields.densepose_part_ids):
+    groundtruth[input_data_fields.groundtruth_dp_part_ids] = tf.stack(
+        detection_model.groundtruth_lists(
+            fields.BoxListFields.densepose_part_ids))
+  if detection_model.groundtruth_has_field(
+      fields.BoxListFields.densepose_surface_coords):
+    groundtruth[input_data_fields.groundtruth_dp_surface_coords] = tf.stack(
+        detection_model.groundtruth_lists(
+            fields.BoxListFields.densepose_surface_coords))
  groundtruth[input_data_fields.num_groundtruth_boxes] = (
      tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
  return groundtruth
@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
        fields.InputDataFields.groundtruth_boxes,
        fields.InputDataFields.groundtruth_keypoints,
        fields.InputDataFields.groundtruth_keypoint_visibilities,
+        fields.InputDataFields.groundtruth_dp_num_points,
+        fields.InputDataFields.groundtruth_dp_part_ids,
+        fields.InputDataFields.groundtruth_dp_surface_coords,
        fields.InputDataFields.groundtruth_group_of,
        fields.InputDataFields.groundtruth_difficult,
        fields.InputDataFields.groundtruth_is_crowd,
@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels):
  if fields.InputDataFields.groundtruth_keypoint_visibilities in labels:
    gt_keypoint_visibilities_list = labels[
        fields.InputDataFields.groundtruth_keypoint_visibilities]
+  gt_dp_num_points_list = None
+  if fields.InputDataFields.groundtruth_dp_num_points in labels:
+    gt_dp_num_points_list = labels[
+        fields.InputDataFields.groundtruth_dp_num_points]
+  gt_dp_part_ids_list = None
+  if fields.InputDataFields.groundtruth_dp_part_ids in labels:
+    gt_dp_part_ids_list = labels[
+        fields.InputDataFields.groundtruth_dp_part_ids]
+  gt_dp_surface_coords_list = None
+  if fields.InputDataFields.groundtruth_dp_surface_coords in labels:
+    gt_dp_surface_coords_list = labels[
+        fields.InputDataFields.groundtruth_dp_surface_coords]
  gt_weights_list = None
  if fields.InputDataFields.groundtruth_weights in labels:
    gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels):
      groundtruth_masks_list=gt_masks_list,
      groundtruth_keypoints_list=gt_keypoints_list,
      groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list,
+      groundtruth_dp_num_points_list=gt_dp_num_points_list,
+      groundtruth_dp_part_ids_list=gt_dp_part_ids_list,
+      groundtruth_dp_surface_coords_list=gt_dp_surface_coords_list,
      groundtruth_weights_list=gt_weights_list,
      groundtruth_is_crowd_list=gt_is_crowd_list,
      groundtruth_group_of_list=gt_group_of_list,
      groundtruth_area_list=gt_area_list)
-def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
+def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
                    postprocess_on_cpu=False):
  """Creates a model function for `Estimator`.
@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
    side_inputs = detection_model.get_side_inputs(features)
    if use_tpu and train_config.use_bfloat16:
-      with contrib_tpu.bfloat16_scope():
+      with tf.tpu.bfloat16_scope():
        prediction_dict = detection_model.predict(
            preprocessed_images,
            features[fields.InputDataFields.true_image_shape], **side_inputs)
@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
    if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
      if use_tpu and postprocess_on_cpu:
-        detections = contrib_tpu.outside_compilation(
+        detections = tf.tpu.outside_compilation(
            postprocess_wrapper,
            (prediction_dict,
             features[fields.InputDataFields.true_image_shape]))
@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
    if mode == tf.estimator.ModeKeys.TRAIN:
      if use_tpu:
-        training_optimizer = contrib_tpu.CrossShardOptimizer(training_optimizer)
+        training_optimizer = tf.tpu.CrossShardOptimizer(training_optimizer)
      # Optionally freeze some layers by setting their gradients to be zero.
      trainable_variables = None
@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
    # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
    if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
-      return contrib_tpu.TPUEstimatorSpec(
+      return tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          scaffold_fn=scaffold_fn,
          predictions=detections,
@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
 def create_estimator_and_inputs(run_config,
-                                hparams,
+                                hparams=None,
-                                pipeline_config_path,
+                                pipeline_config_path=None,
                                config_override=None,
                                train_steps=None,
                                sample_1_of_n_eval_examples=1,
@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config,
  Args:
    run_config: A `RunConfig`.
-    hparams: A `HParams`.
+    hparams: (optional) A `HParams`.
    pipeline_config_path: A path to a pipeline config file.
    config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
      override the config from `pipeline_config_path`.
@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config,
      model_config=model_config, predict_input_config=eval_input_configs[0])
  # Read export_to_tpu from hparams if not passed.
-  if export_to_tpu is None:
+  if export_to_tpu is None and hparams is not None:
    export_to_tpu = hparams.get('export_to_tpu', False)
  tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
                  use_tpu, export_to_tpu)
  model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu,
                              postprocess_on_cpu)
  if use_tpu_estimator:
-    estimator = contrib_tpu.TPUEstimator(
+    estimator = tf.estimator.tpu.TPUEstimator(
        model_fn=model_fn,
        train_batch_size=train_config.batch_size,
        # For each core, only batch size 1 is supported for eval.

--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -93,6 +93,12 @@ def _compute_losses_and_predictions_dicts(
          instance masks for objects.
        labels[fields.InputDataFields.groundtruth_keypoints] is a
          float32 tensor containing keypoints for each box.
+        labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
+          tensor with the number of sampled DensePose points per object.
+        labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32
+          tensor with the DensePose part ids (0-indexed) per object.
+        labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
+          float32 tensor with the DensePose surface coordinates.
        labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor
          containing group_of annotations.
        labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
@@ -195,6 +201,17 @@ def eager_train_step(detection_model,
        labels[fields.InputDataFields.groundtruth_keypoints] is a
          [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
          keypoints for each box.
+        labels[fields.InputDataFields.groundtruth_dp_num_points] is a
+          [batch_size, num_boxes] int32 tensor with the number of DensePose
+          sampled points per instance.
+        labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
+          [batch_size, num_boxes, max_sampled_points] int32 tensor with the
+          part ids (0-indexed) for each instance.
+        labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
+          [batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
+          surface coordinates for each point. Each surface coordinate is of the
+          form (y, x, v, u) where (y, x) are normalized image locations and
+          (v, u) are part-relative normalized surface coordinates.
        labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
          k-hot tensor of classes.
    unpad_groundtruth_tensors: A parameter passed to unstack_batch.

--- a/research/object_detection/model_main.py
+++ b/research/object_detection/model_main.py
@@ -22,7 +22,6 @@ from absl import flags
 import tensorflow.compat.v1 as tf
-from object_detection import model_hparams
 from object_detection import model_lib
 flags.DEFINE_string(
@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
                     'one of every n train input examples for evaluation, '
                     'where n is provided. This is only used if '
                     '`eval_training_data` is True.')
-flags.DEFINE_string(
-    'hparams_overrides', None, 'Hyperparameter overrides, '
-    'represented as a string containing comma-separated '
-    'hparam_name=value pairs.')
 flags.DEFINE_string(
    'checkpoint_dir', None, 'Path to directory holding a checkpoint.  If '
    '`checkpoint_dir` is provided, this binary operates in eval-only mode, '
@@ -68,7 +63,6 @@ def main(unused_argv):
  train_and_eval_dict = model_lib.create_estimator_and_inputs(
      run_config=config,
-      hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
      pipeline_config_path=FLAGS.pipeline_config_path,
      train_steps=FLAGS.num_train_steps,
      sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,

--- a/research/object_detection/model_main_tf2.py
+++ b/research/object_detection/model_main_tf2.py
@@ -83,9 +83,8 @@ def main(unused_argv):
        wait_interval=300, timeout=FLAGS.eval_timeout)
  else:
    if FLAGS.use_tpu:
-      if FLAGS.tpu_name is None:
+      # TPU is automatically inferred if tpu_name is None and
-        raise ValueError('--tpu_name needs to be specified when use_tpu'
+      # we are running under cloud ai-platform.
-                         ' is set.')
      resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
          FLAGS.tpu_name)
      tf.config.experimental_connect_to_cluster(resolver)

--- a/research/object_detection/model_tpu_main.py
+++ b/research/object_detection/model_tpu_main.py
@@ -26,18 +26,8 @@ from absl import flags
 import tensorflow.compat.v1 as tf
-from object_detection import model_hparams
 from object_detection import model_lib
-# pylint: disable=g-import-not-at-top
-try:
-  from tensorflow.contrib import cluster_resolver as contrib_cluster_resolver
-  from tensorflow.contrib import tpu as contrib_tpu
-except ImportError:
-  # TF 2.0 doesn't ship with contrib.
-  pass
-# pylint: enable=g-import-not-at-top
 tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
 # Cloud TPU Cluster Resolvers
@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train',
 flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
                     'this is not provided, batch size is read from training '
                     'config.')
-flags.DEFINE_string(
-    'hparams_overrides', None, 'Comma-separated list of '
-    'hyperparameters to override defaults.')
 flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
 flags.DEFINE_boolean('eval_training_data', False,
                     'If training data should be evaluated for this job.')
@@ -99,15 +85,15 @@ def main(unused_argv):
  flags.mark_flag_as_required('pipeline_config_path')
  tpu_cluster_resolver = (
-      contrib_cluster_resolver.TPUClusterResolver(
+      tf.distribute.cluster_resolver.TPUClusterResolver(
          tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project))
  tpu_grpc_url = tpu_cluster_resolver.get_master()
-  config = contrib_tpu.RunConfig(
+  config = tf.estimator.tpu.RunConfig(
      master=tpu_grpc_url,
      evaluation_master=tpu_grpc_url,
      model_dir=FLAGS.model_dir,
-      tpu_config=contrib_tpu.TPUConfig(
+      tpu_config=tf.estimator.tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_shards))
@@ -117,7 +103,6 @@ def main(unused_argv):
  train_and_eval_dict = model_lib.create_estimator_and_inputs(
      run_config=config,
-      hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
      pipeline_config_path=FLAGS.pipeline_config_path,
      train_steps=FLAGS.num_train_steps,
      sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,

--- a/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor.py
+++ b/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor.py
--- a/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor_tf2_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the ssd_efficientnet_bifpn_feature_extractor."""
+import unittest
+from absl.testing import parameterized
+import numpy as np
+import tensorflow.compat.v2 as tf
+from google.protobuf import text_format
+from object_detection.builders import hyperparams_builder
+from object_detection.models import ssd_efficientnet_bifpn_feature_extractor
+from object_detection.protos import hyperparams_pb2
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+def _count_params(model, trainable_only=True):
+  """Returns the count of all model parameters, or just trainable ones."""
+  if not trainable_only:
+    return model.count_params()
+  else:
+    return int(np.sum([
+        tf.keras.backend.count_params(p) for p in model.trainable_weights]))
+@parameterized.parameters(
+    {'efficientdet_version': 'efficientdet-d0',
+     'efficientnet_version': 'efficientnet-b0',
+     'bifpn_num_iterations': 3,
+     'bifpn_num_filters': 64,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d1',
+     'efficientnet_version': 'efficientnet-b1',
+     'bifpn_num_iterations': 4,
+     'bifpn_num_filters': 88,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d2',
+     'efficientnet_version': 'efficientnet-b2',
+     'bifpn_num_iterations': 5,
+     'bifpn_num_filters': 112,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d3',
+     'efficientnet_version': 'efficientnet-b3',
+     'bifpn_num_iterations': 6,
+     'bifpn_num_filters': 160,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d4',
+     'efficientnet_version': 'efficientnet-b4',
+     'bifpn_num_iterations': 7,
+     'bifpn_num_filters': 224,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d5',
+     'efficientnet_version': 'efficientnet-b5',
+     'bifpn_num_iterations': 7,
+     'bifpn_num_filters': 288,
+     'bifpn_combine_method': 'fast_attention'},
+    # efficientdet-d6 and efficientdet-d7 only differ in input size.
+    {'efficientdet_version': 'efficientdet-d6-d7',
+     'efficientnet_version': 'efficientnet-b6',
+     'bifpn_num_iterations': 8,
+     'bifpn_num_filters': 384,
+     'bifpn_combine_method': 'sum'})
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class SSDEfficientNetBiFPNFeatureExtractorTest(
+    test_case.TestCase, parameterized.TestCase):
+  def _build_conv_hyperparams(self, add_batch_norm=True):
+    conv_hyperparams = hyperparams_pb2.Hyperparams()
+    conv_hyperparams_text_proto = """
+      force_use_bias: true
+      activation: SWISH
+      regularizer {
+        l2_regularizer {
+          weight: 0.0004
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.03
+          mean: 0.0
+        }
+      }
+    """
+    if add_batch_norm:
+      batch_norm_proto = """
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      """
+      conv_hyperparams_text_proto += batch_norm_proto
+    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
+  def _create_feature_extractor(self,
+                                efficientnet_version='efficientnet-b0',
+                                bifpn_num_iterations=3,
+                                bifpn_num_filters=64,
+                                bifpn_combine_method='fast_attention'):
+    """Constructs a new EfficientNetBiFPN feature extractor."""
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    min_depth = 16
+    return (ssd_efficientnet_bifpn_feature_extractor
+            .SSDEfficientNetBiFPNKerasFeatureExtractor(
+                is_training=True,
+                depth_multiplier=depth_multiplier,
+                min_depth=min_depth,
+                pad_to_multiple=pad_to_multiple,
+                conv_hyperparams=self._build_conv_hyperparams(),
+                freeze_batchnorm=False,
+                inplace_batchnorm_update=False,
+                bifpn_min_level=3,
+                bifpn_max_level=7,
+                bifpn_num_iterations=bifpn_num_iterations,
+                bifpn_num_filters=bifpn_num_filters,
+                bifpn_combine_method=bifpn_combine_method,
+                efficientnet_version=efficientnet_version))
+  def test_efficientdet_feature_extractor_shapes(self,
+                                                 efficientdet_version,
+                                                 efficientnet_version,
+                                                 bifpn_num_iterations,
+                                                 bifpn_num_filters,
+                                                 bifpn_combine_method):
+    feature_extractor = self._create_feature_extractor(
+        efficientnet_version=efficientnet_version,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method)
+    outputs = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
+    self.assertEqual(outputs[0].shape, (2, 32, 32, bifpn_num_filters))
+    self.assertEqual(outputs[1].shape, (2, 16, 16, bifpn_num_filters))
+    self.assertEqual(outputs[2].shape, (2, 8, 8, bifpn_num_filters))
+    self.assertEqual(outputs[3].shape, (2, 4, 4, bifpn_num_filters))
+    self.assertEqual(outputs[4].shape, (2, 2, 2, bifpn_num_filters))
+  def test_efficientdet_feature_extractor_params(self,
+                                                 efficientdet_version,
+                                                 efficientnet_version,
+                                                 bifpn_num_iterations,
+                                                 bifpn_num_filters,
+                                                 bifpn_combine_method):
+    feature_extractor = self._create_feature_extractor(
+        efficientnet_version=efficientnet_version,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method)
+    _ = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
+    expected_params = {
+        'efficientdet-d0': 5484829,
+        'efficientdet-d1': 8185156,
+        'efficientdet-d2': 9818153,
+        'efficientdet-d3': 13792706,
+        'efficientdet-d4': 22691445,
+        'efficientdet-d5': 35795677,
+        'efficientdet-d6-d7': 53624512,
+    }
+    num_params = _count_params(feature_extractor)
+    self.assertEqual(expected_params[efficientdet_version], num_params)
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/predictors/heads/head.py
+++ b/research/object_detection/predictors/heads/head.py
@@ -61,7 +61,7 @@ class Head(object):
    pass
-class KerasHead(tf.keras.Model):
+class KerasHead(tf.keras.layers.Layer):
  """Keras head base class."""
  def call(self, features):

--- a/research/object_detection/protos/ssd.proto
+++ b/research/object_detection/protos/ssd.proto
@@ -145,7 +145,7 @@ message Ssd {
  optional MaskHead mask_head_config = 25;
 }
-// Next id: 19.
+// Next id: 20.
 message SsdFeatureExtractor {
  reserved 6;
@@ -185,8 +185,13 @@ message SsdFeatureExtractor {
  // feature maps added by SSD.
  optional bool use_depthwise = 8 [default = false];
-  // Feature Pyramid Networks config.
+  oneof feature_pyramid_oneof {
-  optional FeaturePyramidNetworks fpn = 10;
+    // Feature Pyramid Networks config.
+    FeaturePyramidNetworks fpn = 10;
+    // Bidirectional Feature Pyramid Networks config.
+    BidirectionalFeaturePyramidNetworks bifpn = 19;
+  }
  // If true, replace preprocess function of feature extractor with a
  // placeholder. This should only be used if all the image preprocessing steps
@@ -225,3 +230,23 @@ message FeaturePyramidNetworks {
 }
+// Configuration for Bidirectional Feature Pyramid Networks.
+message BidirectionalFeaturePyramidNetworks {
+  // minimum level in the feature pyramid.
+  optional int32 min_level = 1 [default = 3];
+  // maximum level in the feature pyramid.
+  optional int32 max_level = 2 [default = 7];
+  // The number of repeated top-down bottom-up iterations for BiFPN-based
+  // feature extractors (bidirectional feature pyramid networks).
+  optional int32 num_iterations = 3;
+  // The number of filters (channels) to use in feature pyramid layers for
+  // BiFPN-based feature extractors (bidirectional feature pyramid networks).
+  optional int32 num_filters = 4;
+  // Method used to combine inputs to BiFPN nodes.
+  optional string combine_method = 5 [default = 'fast_attention'];
+}