Merged commit includes the following changes: (#8809)

320335495 by rathodv: Remove hparams support form TF1 main binaries as its not available in TF1.15 runtime on cloud ai platform. -- 320278161 by ronnyvotel: Exposing DensePose fields to model libraries. -- 320277319 by rathodv: Remove TPU Name check since TPU is automatically inferred under cloud AI platform. -- 320258215 by rathodv: Internal Change. -- 320245458 by yuhuic: Updated the CenterNet restore_from_objects function to be compatible with existing configs that load converted checkpoints. -- 320225405 by jonathanhuang: Small change to Keras box predictor and box heads to fix export errors for SSD and Faster R-CNN. -- 320145077 by aom: Implements EfficientDet feature extractor. -- PiperOrigin-RevId: 320335495 Co-authored-by: TF Object Detection Team <no-reply@google.com>

Merged commit includes the following changes: (#8809)
320335495 by rathodv: Remove hparams support form TF1 main binaries as its not available in TF1.15 runtime on cloud ai platform. -- 320278161 by ronnyvotel: Exposing DensePose fields to model libraries. -- 320277319 by rathodv: Remove TPU Name check since TPU is automatically inferred under cloud AI platform. -- 320258215 by rathodv: Internal Change. -- 320245458 by yuhuic: Updated the CenterNet restore_from_objects function to be compatible with existing configs that load converted checkpoints. -- 320225405 by jonathanhuang: Small change to Keras box predictor and box heads to fix export errors for SSD and Faster R-CNN. -- 320145077 by aom: Implements EfficientDet feature extractor. -- PiperOrigin-RevId: 320335495 Co-authored-by: TF Object Detection Team <no-reply@google.com>
0ad4922f · vivek rathod · GitHub · 571369aa · 0ad4922f · 0ad4922f
Unverified Commit 0ad4922f authored Jul 08, 2020 by vivek rathod Committed by GitHub Jul 08, 2020
18 changed files
--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -16,6 +16,7 @@
 """A function to build a DetectionModel from configuration."""

 import functools
+import sys
 from object_detection.builders import anchor_generator_builder
 from object_detection.builders import box_coder_builder
 from object_detection.builders import box_predictor_builder
@@ -58,6 +59,8 @@ if tf_version.is_tf2():
  from object_detection.models.ssd_mobilenet_v2_fpn_keras_feature_extractor import SSDMobileNetV2FpnKerasFeatureExtractor
  from object_detection.models.ssd_mobilenet_v2_keras_feature_extractor import SSDMobileNetV2KerasFeatureExtractor
  from object_detection.predictors import rfcn_keras_box_predictor
+  if sys.version_info[0] >= 3:
+    from object_detection.models import ssd_efficientnet_bifpn_feature_extractor as ssd_efficientnet_bifpn

 if tf_version.is_tf1():
  from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
@@ -99,6 +102,22 @@ if tf_version.is_tf2():
          ssd_resnet_v1_fpn_keras.SSDResNet101V1FpnKerasFeatureExtractor,
      'ssd_resnet152_v1_fpn_keras':
          ssd_resnet_v1_fpn_keras.SSDResNet152V1FpnKerasFeatureExtractor,
+      'ssd_efficientnet-b0_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB0BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b1_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB1BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b2_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB2BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b3_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB3BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b4_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB4BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b5_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB5BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b6_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB6BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b7_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB7BiFPNKerasFeatureExtractor,
  }

  FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP = {
@@ -310,6 +329,14 @@ def _build_ssd_feature_extractor(feature_extractor_config,
            feature_extractor_config.fpn.additional_layer_depth,
    })

+  if feature_extractor_config.HasField('bifpn'):
+    kwargs.update({
+        'bifpn_min_level': feature_extractor_config.bifpn.min_level,
+        'bifpn_max_level': feature_extractor_config.bifpn.max_level,
+        'bifpn_num_iterations': feature_extractor_config.bifpn.num_iterations,
+        'bifpn_num_filters': feature_extractor_config.bifpn.num_filters,
+        'bifpn_combine_method': feature_extractor_config.bifpn.combine_method,
+    })

  return feature_extractor_class(**kwargs)


--- a/research/object_detection/builders/model_builder_test.py
+++ b/research/object_detection/builders/model_builder_test.py
@@ -39,6 +39,9 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
  def ssd_feature_extractors(self):
    raise NotImplementedError

+  def get_override_base_feature_extractor_hyperparams(self, extractor_type):
+    raise NotImplementedError
+
  def faster_rcnn_feature_extractors(self):
    raise NotImplementedError

@@ -70,7 +73,6 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
                }
              }
          }
-          override_base_feature_extractor_hyperparams: true
        }
        box_coder {
          faster_rcnn_box_coder {
@@ -205,6 +207,8 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
    for extractor_type, extractor_class in self.ssd_feature_extractors().items(
    ):
      model_proto.ssd.feature_extractor.type = extractor_type
+      model_proto.ssd.feature_extractor.override_base_feature_extractor_hyperparams = (
+          self.get_override_base_feature_extractor_hyperparams(extractor_type))
      model = model_builder.build(model_proto, is_training=True)
      self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
      self.assertIsInstance(model._feature_extractor, extractor_class)

--- a/research/object_detection/builders/model_builder_tf1_test.py
+++ b/research/object_detection/builders/model_builder_tf1_test.py
@@ -38,6 +38,9 @@ class ModelBuilderTF1Test(model_builder_test.ModelBuilderTest):
  def ssd_feature_extractors(self):
    return model_builder.SSD_FEATURE_EXTRACTOR_CLASS_MAP

+  def get_override_base_feature_extractor_hyperparams(self, extractor_type):
+    return extractor_type in {'ssd_inception_v2', 'ssd_inception_v3'}
+
  def faster_rcnn_feature_extractors(self):
    return model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP


--- a/research/object_detection/builders/model_builder_tf2_test.py
+++ b/research/object_detection/builders/model_builder_tf2_test.py
@@ -42,6 +42,9 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest):
  def ssd_feature_extractors(self):
    return model_builder.SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP

+  def get_override_base_feature_extractor_hyperparams(self, extractor_type):
+    return extractor_type in {}
+
  def faster_rcnn_feature_extractors(self):
    return model_builder.FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP


--- a/research/object_detection/core/box_predictor.py
+++ b/research/object_detection/core/box_predictor.py
@@ -134,7 +134,7 @@ class BoxPredictor(object):
    pass


-class KerasBoxPredictor(tf.keras.Model):
+class KerasBoxPredictor(tf.keras.layers.Layer):
  """Keras-based BoxPredictor."""

  def __init__(self, is_training, num_classes, freeze_batchnorm,

--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        detection_classes: [batch, max_detections]
          (If a model is producing class-agnostic detections, this field may be
          missing)
-        instance_masks: [batch, max_detections, image_height, image_width]
+        detection_masks: [batch, max_detections, mask_height, mask_width]
          (optional)
-        keypoints: [batch, max_detections, num_keypoints, 2] (optional)
+        detection_keypoints: [batch, max_detections, num_keypoints, 2]
+          (optional)
+        detection_keypoint_scores: [batch, max_detections, num_keypoints]
+          (optional)
+        detection_surface_coords: [batch, max_detections, mask_height,
+          mask_width, 2] (optional)
        num_detections: [batch]

        In addition to the above fields this stage also outputs the following

--- a/research/object_detection/meta_architectures/center_net_meta_arch.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch.py
@@ -2749,6 +2749,14 @@ class CenterNetMetaArch(model.DetectionModel):
        checkpoint (with compatible variable names) or to restore from a
        classification checkpoint for initialization prior to training.
        Valid values: `detection`, `classification`. Default 'detection'.
+        'detection': used when loading in the Hourglass model pre-trained on
+          other detection task.
+        'classification': used when loading in the ResNet model pre-trained on
+          image classification task. Note that only the image feature encoding
+          part is loaded but not those upsampling layers.
+        'fine_tune': used when loading the entire CenterNet feature extractor
+          pre-trained on other tasks. The checkpoints saved during CenterNet
+          model training can be directly loaded using this mode.

    Returns:
      A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
@@ -2757,10 +2765,13 @@ class CenterNetMetaArch(model.DetectionModel):
    if fine_tune_checkpoint_type == 'classification':
      return {'feature_extractor': self._feature_extractor.get_base_model()}

-    if fine_tune_checkpoint_type == 'detection':
-      fake_model = tf.train.Checkpoint(
+    elif fine_tune_checkpoint_type == 'detection':
+      return {'feature_extractor': self._feature_extractor.get_model()}
+
+    elif fine_tune_checkpoint_type == 'fine_tune':
+      feature_extractor_model = tf.train.Checkpoint(
          _feature_extractor=self._feature_extractor)
-      return {'model': fake_model}
+      return {'model': feature_extractor_model}

    else:
      raise ValueError('Not supported  fine tune checkpoint type - {}'.format(

--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
    return eval_metric_ops


-def _check_mask_type_and_value(array_name, masks):
-  """Checks whether mask dtype is uint8 and the values are either 0 or 1."""
-  if masks.dtype != np.uint8:
-    raise ValueError('{} must be of type np.uint8. Found {}.'.format(
-        array_name, masks.dtype))
-  if np.any(np.logical_and(masks != 0, masks != 1)):
-    raise ValueError('{} elements can only be either 0 or 1.'.format(
-        array_name))
+def convert_masks_to_binary(masks):
+  """Converts masks to 0 or 1 and uint8 type."""
+  return (masks > 0).astype(np.uint8)


 class CocoKeypointEvaluator(CocoDetectionEvaluator):
@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):

    groundtruth_instance_masks = groundtruth_dict[
        standard_fields.InputDataFields.groundtruth_instance_masks]
-    _check_mask_type_and_value(standard_fields.InputDataFields.
-                               groundtruth_instance_masks,
-                               groundtruth_instance_masks)
+    groundtruth_instance_masks = convert_masks_to_binary(
+        groundtruth_instance_masks)
    self._groundtruth_list.extend(
        coco_tools.
        ExportSingleImageGroundtruthToCoco(
@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
                       'are incompatible: {} vs {}'.format(
                           groundtruth_masks_shape,
                           detection_masks.shape))
-    _check_mask_type_and_value(standard_fields.DetectionResultFields.
-                               detection_masks,
-                               detection_masks)
+    detection_masks = convert_masks_to_binary(detection_masks)
    self._detection_masks_list.extend(
        coco_tools.ExportSingleImageDetectionMasksToCoco(
            image_id=image_id,

--- a/research/object_detection/metrics/coco_evaluation_test.py
+++ b/research/object_detection/metrics/coco_evaluation_test.py
@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
        image_id='image3',
        detections_dict={
            standard_fields.DetectionResultFields.detection_boxes:
-            np.array([[25., 25., 50., 50.]]),
+                np.array([[25., 25., 50., 50.]]),
            standard_fields.DetectionResultFields.detection_scores:
-            np.array([.8]),
+                np.array([.8]),
            standard_fields.DetectionResultFields.detection_classes:
-            np.array([1]),
+                np.array([1]),
            standard_fields.DetectionResultFields.detection_masks:
-            np.pad(np.ones([1, 25, 25], dtype=np.uint8),
-                   ((0, 0), (10, 10), (10, 10)), mode='constant')
+                # The value of 5 is equivalent to 1, since masks will be
+                # thresholded and binarized before evaluation.
+                np.pad(5 * np.ones([1, 25, 25], dtype=np.uint8),
+                       ((0, 0), (10, 10), (10, 10)), mode='constant')
        })
    metrics = coco_evaluator.evaluate()
    self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0)

--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils
 # pylint: disable=g-import-not-at-top
 try:
  from tensorflow.contrib import learn as contrib_learn
-  from tensorflow.contrib import tpu as contrib_tpu
 except ImportError:
  # TF 2.0 doesn't ship with contrib.
  pass
@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
        of groundtruth boxes per image..
      'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
        tensor of keypoints (if provided in groundtruth).
+      'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
+        with the number of DensePose points for each instance (if provided in
+        groundtruth).
+      'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
+        max_sampled_points] int32 tensor with the part ids for each DensePose
+        sampled point (if provided in groundtruth).
+      'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
+        max_sampled_points, 4] containing the DensePose surface coordinates for
+        each sampled point (if provided in groundtruth).
      'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
        group_of annotations (if provided in groundtruth).
      'groundtruth_labeled_classes': [batch_size, num_classes] int64
@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
      groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
          labeled_classes)

+  if detection_model.groundtruth_has_field(
+      fields.BoxListFields.densepose_num_points):
+    groundtruth[input_data_fields.groundtruth_dp_num_points] = tf.stack(
+        detection_model.groundtruth_lists(
+            fields.BoxListFields.densepose_num_points))
+  if detection_model.groundtruth_has_field(
+      fields.BoxListFields.densepose_part_ids):
+    groundtruth[input_data_fields.groundtruth_dp_part_ids] = tf.stack(
+        detection_model.groundtruth_lists(
+            fields.BoxListFields.densepose_part_ids))
+  if detection_model.groundtruth_has_field(
+      fields.BoxListFields.densepose_surface_coords):
+    groundtruth[input_data_fields.groundtruth_dp_surface_coords] = tf.stack(
+        detection_model.groundtruth_lists(
+            fields.BoxListFields.densepose_surface_coords))
  groundtruth[input_data_fields.num_groundtruth_boxes] = (
      tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
  return groundtruth
@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
        fields.InputDataFields.groundtruth_boxes,
        fields.InputDataFields.groundtruth_keypoints,
        fields.InputDataFields.groundtruth_keypoint_visibilities,
+        fields.InputDataFields.groundtruth_dp_num_points,
+        fields.InputDataFields.groundtruth_dp_part_ids,
+        fields.InputDataFields.groundtruth_dp_surface_coords,
        fields.InputDataFields.groundtruth_group_of,
        fields.InputDataFields.groundtruth_difficult,
        fields.InputDataFields.groundtruth_is_crowd,
@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels):
  if fields.InputDataFields.groundtruth_keypoint_visibilities in labels:
    gt_keypoint_visibilities_list = labels[
        fields.InputDataFields.groundtruth_keypoint_visibilities]
+  gt_dp_num_points_list = None
+  if fields.InputDataFields.groundtruth_dp_num_points in labels:
+    gt_dp_num_points_list = labels[
+        fields.InputDataFields.groundtruth_dp_num_points]
+  gt_dp_part_ids_list = None
+  if fields.InputDataFields.groundtruth_dp_part_ids in labels:
+    gt_dp_part_ids_list = labels[
+        fields.InputDataFields.groundtruth_dp_part_ids]
+  gt_dp_surface_coords_list = None
+  if fields.InputDataFields.groundtruth_dp_surface_coords in labels:
+    gt_dp_surface_coords_list = labels[
+        fields.InputDataFields.groundtruth_dp_surface_coords]
  gt_weights_list = None
  if fields.InputDataFields.groundtruth_weights in labels:
    gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels):
      groundtruth_masks_list=gt_masks_list,
      groundtruth_keypoints_list=gt_keypoints_list,
      groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list,
+      groundtruth_dp_num_points_list=gt_dp_num_points_list,
+      groundtruth_dp_part_ids_list=gt_dp_part_ids_list,
+      groundtruth_dp_surface_coords_list=gt_dp_surface_coords_list,
      groundtruth_weights_list=gt_weights_list,
      groundtruth_is_crowd_list=gt_is_crowd_list,
      groundtruth_group_of_list=gt_group_of_list,
      groundtruth_area_list=gt_area_list)


-def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
+def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
                    postprocess_on_cpu=False):
  """Creates a model function for `Estimator`.

@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
    side_inputs = detection_model.get_side_inputs(features)

    if use_tpu and train_config.use_bfloat16:
-      with contrib_tpu.bfloat16_scope():
+      with tf.tpu.bfloat16_scope():
        prediction_dict = detection_model.predict(
            preprocessed_images,
            features[fields.InputDataFields.true_image_shape], **side_inputs)
@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,

    if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
      if use_tpu and postprocess_on_cpu:
-        detections = contrib_tpu.outside_compilation(
+        detections = tf.tpu.outside_compilation(
            postprocess_wrapper,
            (prediction_dict,
             features[fields.InputDataFields.true_image_shape]))
@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,

    if mode == tf.estimator.ModeKeys.TRAIN:
      if use_tpu:
-        training_optimizer = contrib_tpu.CrossShardOptimizer(training_optimizer)
+        training_optimizer = tf.tpu.CrossShardOptimizer(training_optimizer)

      # Optionally freeze some layers by setting their gradients to be zero.
      trainable_variables = None
@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,

    # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
    if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
-      return contrib_tpu.TPUEstimatorSpec(
+      return tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          scaffold_fn=scaffold_fn,
          predictions=detections,
@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,


 def create_estimator_and_inputs(run_config,
-                                hparams,
-                                pipeline_config_path,
+                                hparams=None,
+                                pipeline_config_path=None,
                                config_override=None,
                                train_steps=None,
                                sample_1_of_n_eval_examples=1,
@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config,

  Args:
    run_config: A `RunConfig`.
-    hparams: A `HParams`.
+    hparams: (optional) A `HParams`.
    pipeline_config_path: A path to a pipeline config file.
    config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
      override the config from `pipeline_config_path`.
@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config,
      model_config=model_config, predict_input_config=eval_input_configs[0])

  # Read export_to_tpu from hparams if not passed.
-  if export_to_tpu is None:
+  if export_to_tpu is None and hparams is not None:
    export_to_tpu = hparams.get('export_to_tpu', False)
  tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
                  use_tpu, export_to_tpu)
  model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu,
                              postprocess_on_cpu)
  if use_tpu_estimator:
-    estimator = contrib_tpu.TPUEstimator(
+    estimator = tf.estimator.tpu.TPUEstimator(
        model_fn=model_fn,
        train_batch_size=train_config.batch_size,
        # For each core, only batch size 1 is supported for eval.

--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -93,6 +93,12 @@ def _compute_losses_and_predictions_dicts(
          instance masks for objects.
        labels[fields.InputDataFields.groundtruth_keypoints] is a
          float32 tensor containing keypoints for each box.
+        labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
+          tensor with the number of sampled DensePose points per object.
+        labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32
+          tensor with the DensePose part ids (0-indexed) per object.
+        labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
+          float32 tensor with the DensePose surface coordinates.
        labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor
          containing group_of annotations.
        labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
@@ -195,6 +201,17 @@ def eager_train_step(detection_model,
        labels[fields.InputDataFields.groundtruth_keypoints] is a
          [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
          keypoints for each box.
+        labels[fields.InputDataFields.groundtruth_dp_num_points] is a
+          [batch_size, num_boxes] int32 tensor with the number of DensePose
+          sampled points per instance.
+        labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
+          [batch_size, num_boxes, max_sampled_points] int32 tensor with the
+          part ids (0-indexed) for each instance.
+        labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
+          [batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
+          surface coordinates for each point. Each surface coordinate is of the
+          form (y, x, v, u) where (y, x) are normalized image locations and
+          (v, u) are part-relative normalized surface coordinates.
        labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
          k-hot tensor of classes.
    unpad_groundtruth_tensors: A parameter passed to unstack_batch.

--- a/research/object_detection/model_main.py
+++ b/research/object_detection/model_main.py
@@ -22,7 +22,6 @@ from absl import flags

 import tensorflow.compat.v1 as tf

-from object_detection import model_hparams
 from object_detection import model_lib

 flags.DEFINE_string(
@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
                     'one of every n train input examples for evaluation, '
                     'where n is provided. This is only used if '
                     '`eval_training_data` is True.')
-flags.DEFINE_string(
-    'hparams_overrides', None, 'Hyperparameter overrides, '
-    'represented as a string containing comma-separated '
-    'hparam_name=value pairs.')
 flags.DEFINE_string(
    'checkpoint_dir', None, 'Path to directory holding a checkpoint.  If '
    '`checkpoint_dir` is provided, this binary operates in eval-only mode, '
@@ -68,7 +63,6 @@ def main(unused_argv):

  train_and_eval_dict = model_lib.create_estimator_and_inputs(
      run_config=config,
-      hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
      pipeline_config_path=FLAGS.pipeline_config_path,
      train_steps=FLAGS.num_train_steps,
      sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,

--- a/research/object_detection/model_main_tf2.py
+++ b/research/object_detection/model_main_tf2.py
@@ -83,9 +83,8 @@ def main(unused_argv):
        wait_interval=300, timeout=FLAGS.eval_timeout)
  else:
    if FLAGS.use_tpu:
-      if FLAGS.tpu_name is None:
-        raise ValueError('--tpu_name needs to be specified when use_tpu'
-                         ' is set.')
+      # TPU is automatically inferred if tpu_name is None and
+      # we are running under cloud ai-platform.
      resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
          FLAGS.tpu_name)
      tf.config.experimental_connect_to_cluster(resolver)

--- a/research/object_detection/model_tpu_main.py
+++ b/research/object_detection/model_tpu_main.py
@@ -26,18 +26,8 @@ from absl import flags
 import tensorflow.compat.v1 as tf


-from object_detection import model_hparams
 from object_detection import model_lib

-# pylint: disable=g-import-not-at-top
-try:
-  from tensorflow.contrib import cluster_resolver as contrib_cluster_resolver
-  from tensorflow.contrib import tpu as contrib_tpu
-except ImportError:
-  # TF 2.0 doesn't ship with contrib.
-  pass
-# pylint: enable=g-import-not-at-top
-
 tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')

 # Cloud TPU Cluster Resolvers
@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train',
 flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
                     'this is not provided, batch size is read from training '
                     'config.')
-
-flags.DEFINE_string(
-    'hparams_overrides', None, 'Comma-separated list of '
-    'hyperparameters to override defaults.')
 flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
 flags.DEFINE_boolean('eval_training_data', False,
                     'If training data should be evaluated for this job.')
@@ -99,15 +85,15 @@ def main(unused_argv):
  flags.mark_flag_as_required('pipeline_config_path')

  tpu_cluster_resolver = (
-      contrib_cluster_resolver.TPUClusterResolver(
+      tf.distribute.cluster_resolver.TPUClusterResolver(
          tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project))
  tpu_grpc_url = tpu_cluster_resolver.get_master()

-  config = contrib_tpu.RunConfig(
+  config = tf.estimator.tpu.RunConfig(
      master=tpu_grpc_url,
      evaluation_master=tpu_grpc_url,
      model_dir=FLAGS.model_dir,
-      tpu_config=contrib_tpu.TPUConfig(
+      tpu_config=tf.estimator.tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_shards))

@@ -117,7 +103,6 @@ def main(unused_argv):

  train_and_eval_dict = model_lib.create_estimator_and_inputs(
      run_config=config,
-      hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
      pipeline_config_path=FLAGS.pipeline_config_path,
      train_steps=FLAGS.num_train_steps,
      sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,

--- a/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor.py
+++ b/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SSD Keras-based EfficientNet + BiFPN (EfficientDet) Feature Extractor."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import logging
+from six.moves import range
+from six.moves import zip
+import tensorflow.compat.v2 as tf
+
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import bidirectional_feature_pyramid_generators as bifpn_generators
+from object_detection.utils import ops
+from object_detection.utils import shape_utils
+from object_detection.utils import tf_version
+# pylint: disable=g-import-not-at-top
+if tf_version.is_tf2():
+  from official.vision.image_classification.efficientnet import efficientnet_model
+
+_EFFICIENTNET_LEVEL_ENDPOINTS = {
+    1: 'stack_0/block_0/project_bn',
+    2: 'stack_1/block_1/add',
+    3: 'stack_2/block_1/add',
+    4: 'stack_4/block_2/add',
+    5: 'stack_6/block_0/project_bn',
+}
+
+
+class SSDEfficientNetBiFPNKerasFeatureExtractor(
+    ssd_meta_arch.SSDKerasFeatureExtractor):
+  """SSD Keras-based EfficientNetBiFPN (EfficientDet) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level,
+               bifpn_max_level,
+               bifpn_num_iterations,
+               bifpn_num_filters,
+               bifpn_combine_method,
+               efficientnet_version,
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name=None):
+    """SSD Keras-based EfficientNetBiFPN (EfficientDet) feature extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      efficientnet_version: the EfficientNet version to use for this feature
+        extractor's backbone.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetBiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        use_explicit_padding=None,
+        use_depthwise=None,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+    if depth_multiplier != 1.0:
+      raise ValueError('EfficientNetBiFPN does not support a non-default '
+                       'depth_multiplier.')
+    if use_explicit_padding:
+      raise ValueError('EfficientNetBiFPN does not support explicit padding.')
+    if use_depthwise:
+      raise ValueError('EfficientNetBiFPN does not support use_depthwise.')
+    if override_base_feature_extractor_hyperparams:
+      raise ValueError('EfficientNetBiFPN does not support '
+                       'override_base_feature_extractor_hyperparams.')
+
+    self._bifpn_min_level = bifpn_min_level
+    self._bifpn_max_level = bifpn_max_level
+    self._bifpn_num_iterations = bifpn_num_iterations
+    self._bifpn_num_filters = max(bifpn_num_filters, min_depth)
+    self._bifpn_node_params = {'combine_method': bifpn_combine_method}
+    self._efficientnet_version = efficientnet_version
+
+    logging.info('EfficientDet EfficientNet backbone version: %s',
+                 self._efficientnet_version)
+    logging.info('EfficientDet BiFPN num filters: %d', self._bifpn_num_filters)
+    logging.info('EfficientDet BiFPN num iterations: %d',
+                 self._bifpn_num_iterations)
+
+    self._backbone_max_level = min(
+        max(_EFFICIENTNET_LEVEL_ENDPOINTS.keys()), bifpn_max_level)
+    self._output_layer_names = [
+        _EFFICIENTNET_LEVEL_ENDPOINTS[i]
+        for i in range(bifpn_min_level, self._backbone_max_level + 1)]
+    self._output_layer_alias = [
+        'level_{}'.format(i)
+        for i in range(bifpn_min_level, self._backbone_max_level + 1)]
+
+    # Initialize the EfficientNet backbone.
+    # Note, this is currently done in the init method rather than in the build
+    # method, since doing so introduces an error which is not well understood.
+    efficientnet_base = efficientnet_model.EfficientNet.from_name(
+        model_name=self._efficientnet_version,
+        overrides={'rescale_input': False})
+    outputs = [efficientnet_base.get_layer(output_layer_name).output
+               for output_layer_name in self._output_layer_names]
+    self._efficientnet = tf.keras.Model(
+        inputs=efficientnet_base.inputs, outputs=outputs)
+    self.classification_backbone = efficientnet_base
+    self._bifpn_stage = None
+
+  def build(self, input_shape):
+    self._bifpn_stage = bifpn_generators.KerasBiFpnFeatureMaps(
+        bifpn_num_iterations=self._bifpn_num_iterations,
+        bifpn_num_filters=self._bifpn_num_filters,
+        fpn_min_level=self._bifpn_min_level,
+        fpn_max_level=self._bifpn_max_level,
+        input_max_level=self._backbone_max_level,
+        is_training=self._is_training,
+        conv_hyperparams=self._conv_hyperparams,
+        freeze_batchnorm=self._freeze_batchnorm,
+        bifpn_node_params=self._bifpn_node_params,
+        name='bifpn')
+    self.built = True
+
+  def preprocess(self, inputs):
+    """SSD preprocessing.
+
+    Channel-wise mean subtraction and scaling.
+
+    Args:
+      inputs: a [batch, height, width, channels] float tensor representing a
+        batch of images.
+
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    if inputs.shape.as_list()[3] == 3:
+      # Input images are expected to be in the range [0, 255].
+      channel_offset = [0.485, 0.456, 0.406]
+      channel_scale = [0.229, 0.224, 0.225]
+      return ((inputs / 255.0) - [[channel_offset]]) / [[channel_scale]]
+    else:
+      return inputs
+
+  def _extract_features(self, preprocessed_inputs):
+    """Extract features from preprocessed inputs.
+
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+    """
+    preprocessed_inputs = shape_utils.check_min_image_dim(
+        129, preprocessed_inputs)
+
+    base_feature_maps = self._efficientnet(
+        ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
+
+    output_feature_map_dict = self._bifpn_stage(
+        zip(self._output_layer_alias, base_feature_maps))
+
+    return list(output_feature_map_dict.values())
+
+
+class SSDEfficientNetB0BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b0 BiFPN (EfficientDet-d0) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=3,
+               bifpn_num_filters=64,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D0'):
+    """SSD Keras EfficientNet-b0 BiFPN (EfficientDet-d0) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB0BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b0',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB1BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b1 BiFPN (EfficientDet-d1) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=4,
+               bifpn_num_filters=88,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D1'):
+    """SSD Keras EfficientNet-b1 BiFPN (EfficientDet-d1) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB1BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b1',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB2BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b2 BiFPN (EfficientDet-d2) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=5,
+               bifpn_num_filters=112,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D2'):
+
+    """SSD Keras EfficientNet-b2 BiFPN (EfficientDet-d2) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB2BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b2',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB3BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b3 BiFPN (EfficientDet-d3) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=6,
+               bifpn_num_filters=160,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D3'):
+
+    """SSD Keras EfficientNet-b3 BiFPN (EfficientDet-d3) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB3BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b3',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB4BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b4 BiFPN (EfficientDet-d4) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=7,
+               bifpn_num_filters=224,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D4'):
+
+    """SSD Keras EfficientNet-b4 BiFPN (EfficientDet-d4) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB4BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b4',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB5BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b5 BiFPN (EfficientDet-d5) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=7,
+               bifpn_num_filters=288,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D5'):
+
+    """SSD Keras EfficientNet-b5 BiFPN (EfficientDet-d5) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB5BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b5',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB6BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b6 BiFPN (EfficientDet-d[6,7]) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=8,
+               bifpn_num_filters=384,
+               bifpn_combine_method='sum',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D6-D7'):
+
+    """SSD Keras EfficientNet-b6 BiFPN (EfficientDet-d[6,7]) Feature Extractor.
+
+    SSD Keras EfficientNet-b6 BiFPN Feature Extractor, a.k.a. EfficientDet-d6
+    and EfficientDet-d7. The EfficientDet-d[6,7] models use the same backbone
+    EfficientNet-b6 and the same BiFPN architecture, and therefore have the same
+    number of parameters. They only differ in their input resolutions.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB6BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b6',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB7BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b7 BiFPN Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=8,
+               bifpn_num_filters=384,
+               bifpn_combine_method='sum',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientNet-B7_BiFPN'):
+
+    """SSD Keras EfficientNet-b7 BiFPN Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB7BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b7',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
--- a/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor_tf2_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the ssd_efficientnet_bifpn_feature_extractor."""
+import unittest
+from absl.testing import parameterized
+
+import numpy as np
+import tensorflow.compat.v2 as tf
+
+from google.protobuf import text_format
+from object_detection.builders import hyperparams_builder
+from object_detection.models import ssd_efficientnet_bifpn_feature_extractor
+from object_detection.protos import hyperparams_pb2
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+
+def _count_params(model, trainable_only=True):
+  """Returns the count of all model parameters, or just trainable ones."""
+  if not trainable_only:
+    return model.count_params()
+  else:
+    return int(np.sum([
+        tf.keras.backend.count_params(p) for p in model.trainable_weights]))
+
+
+@parameterized.parameters(
+    {'efficientdet_version': 'efficientdet-d0',
+     'efficientnet_version': 'efficientnet-b0',
+     'bifpn_num_iterations': 3,
+     'bifpn_num_filters': 64,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d1',
+     'efficientnet_version': 'efficientnet-b1',
+     'bifpn_num_iterations': 4,
+     'bifpn_num_filters': 88,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d2',
+     'efficientnet_version': 'efficientnet-b2',
+     'bifpn_num_iterations': 5,
+     'bifpn_num_filters': 112,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d3',
+     'efficientnet_version': 'efficientnet-b3',
+     'bifpn_num_iterations': 6,
+     'bifpn_num_filters': 160,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d4',
+     'efficientnet_version': 'efficientnet-b4',
+     'bifpn_num_iterations': 7,
+     'bifpn_num_filters': 224,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d5',
+     'efficientnet_version': 'efficientnet-b5',
+     'bifpn_num_iterations': 7,
+     'bifpn_num_filters': 288,
+     'bifpn_combine_method': 'fast_attention'},
+    # efficientdet-d6 and efficientdet-d7 only differ in input size.
+    {'efficientdet_version': 'efficientdet-d6-d7',
+     'efficientnet_version': 'efficientnet-b6',
+     'bifpn_num_iterations': 8,
+     'bifpn_num_filters': 384,
+     'bifpn_combine_method': 'sum'})
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class SSDEfficientNetBiFPNFeatureExtractorTest(
+    test_case.TestCase, parameterized.TestCase):
+
+  def _build_conv_hyperparams(self, add_batch_norm=True):
+    conv_hyperparams = hyperparams_pb2.Hyperparams()
+    conv_hyperparams_text_proto = """
+      force_use_bias: true
+      activation: SWISH
+      regularizer {
+        l2_regularizer {
+          weight: 0.0004
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.03
+          mean: 0.0
+        }
+      }
+    """
+    if add_batch_norm:
+      batch_norm_proto = """
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      """
+      conv_hyperparams_text_proto += batch_norm_proto
+    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
+
+  def _create_feature_extractor(self,
+                                efficientnet_version='efficientnet-b0',
+                                bifpn_num_iterations=3,
+                                bifpn_num_filters=64,
+                                bifpn_combine_method='fast_attention'):
+    """Constructs a new EfficientNetBiFPN feature extractor."""
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    min_depth = 16
+    return (ssd_efficientnet_bifpn_feature_extractor
+            .SSDEfficientNetBiFPNKerasFeatureExtractor(
+                is_training=True,
+                depth_multiplier=depth_multiplier,
+                min_depth=min_depth,
+                pad_to_multiple=pad_to_multiple,
+                conv_hyperparams=self._build_conv_hyperparams(),
+                freeze_batchnorm=False,
+                inplace_batchnorm_update=False,
+                bifpn_min_level=3,
+                bifpn_max_level=7,
+                bifpn_num_iterations=bifpn_num_iterations,
+                bifpn_num_filters=bifpn_num_filters,
+                bifpn_combine_method=bifpn_combine_method,
+                efficientnet_version=efficientnet_version))
+
+  def test_efficientdet_feature_extractor_shapes(self,
+                                                 efficientdet_version,
+                                                 efficientnet_version,
+                                                 bifpn_num_iterations,
+                                                 bifpn_num_filters,
+                                                 bifpn_combine_method):
+    feature_extractor = self._create_feature_extractor(
+        efficientnet_version=efficientnet_version,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method)
+    outputs = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
+
+    self.assertEqual(outputs[0].shape, (2, 32, 32, bifpn_num_filters))
+    self.assertEqual(outputs[1].shape, (2, 16, 16, bifpn_num_filters))
+    self.assertEqual(outputs[2].shape, (2, 8, 8, bifpn_num_filters))
+    self.assertEqual(outputs[3].shape, (2, 4, 4, bifpn_num_filters))
+    self.assertEqual(outputs[4].shape, (2, 2, 2, bifpn_num_filters))
+
+  def test_efficientdet_feature_extractor_params(self,
+                                                 efficientdet_version,
+                                                 efficientnet_version,
+                                                 bifpn_num_iterations,
+                                                 bifpn_num_filters,
+                                                 bifpn_combine_method):
+    feature_extractor = self._create_feature_extractor(
+        efficientnet_version=efficientnet_version,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method)
+    _ = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
+    expected_params = {
+        'efficientdet-d0': 5484829,
+        'efficientdet-d1': 8185156,
+        'efficientdet-d2': 9818153,
+        'efficientdet-d3': 13792706,
+        'efficientdet-d4': 22691445,
+        'efficientdet-d5': 35795677,
+        'efficientdet-d6-d7': 53624512,
+    }
+    num_params = _count_params(feature_extractor)
+    self.assertEqual(expected_params[efficientdet_version], num_params)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/predictors/heads/head.py
+++ b/research/object_detection/predictors/heads/head.py
@@ -61,7 +61,7 @@ class Head(object):
    pass


-class KerasHead(tf.keras.Model):
+class KerasHead(tf.keras.layers.Layer):
  """Keras head base class."""

  def call(self, features):

--- a/research/object_detection/protos/ssd.proto
+++ b/research/object_detection/protos/ssd.proto
@@ -145,7 +145,7 @@ message Ssd {
  optional MaskHead mask_head_config = 25;
 }

-// Next id: 19.
+// Next id: 20.
 message SsdFeatureExtractor {
  reserved 6;

@@ -185,8 +185,13 @@ message SsdFeatureExtractor {
  // feature maps added by SSD.
  optional bool use_depthwise = 8 [default = false];

-  // Feature Pyramid Networks config.
-  optional FeaturePyramidNetworks fpn = 10;
+  oneof feature_pyramid_oneof {
+    // Feature Pyramid Networks config.
+    FeaturePyramidNetworks fpn = 10;
+
+    // Bidirectional Feature Pyramid Networks config.
+    BidirectionalFeaturePyramidNetworks bifpn = 19;
+  }

  // If true, replace preprocess function of feature extractor with a
  // placeholder. This should only be used if all the image preprocessing steps
@@ -225,3 +230,23 @@ message FeaturePyramidNetworks {

 }

+// Configuration for Bidirectional Feature Pyramid Networks.
+message BidirectionalFeaturePyramidNetworks {
+  // minimum level in the feature pyramid.
+  optional int32 min_level = 1 [default = 3];
+
+  // maximum level in the feature pyramid.
+  optional int32 max_level = 2 [default = 7];
+
+  // The number of repeated top-down bottom-up iterations for BiFPN-based
+  // feature extractors (bidirectional feature pyramid networks).
+  optional int32 num_iterations = 3;
+
+  // The number of filters (channels) to use in feature pyramid layers for
+  // BiFPN-based feature extractors (bidirectional feature pyramid networks).
+  optional int32 num_filters = 4;
+
+  // Method used to combine inputs to BiFPN nodes.
+  optional string combine_method = 5 [default = 'fast_attention'];
+}
+