Merge branch 'master' of https://github.com/tensorflow/models into context_tf2

3ce2f61b · Kaushik Shivakumar · bb16d5ca · 8e9296ff · 3ce2f61b · 3ce2f61b
Commit 3ce2f61b authored Jul 12, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor_tf2_test.py
@@ -21,8 +21,8 @@ from google.protobuf import text_format

 from object_detection.builders import hyperparams_builder
 from object_detection.models import faster_rcnn_resnet_v1_fpn_keras_feature_extractor as frcnn_res_fpn
-from object_detection.utils import tf_version
 from object_detection.protos import hyperparams_pb2
+from object_detection.utils import tf_version


 @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
@@ -40,7 +40,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractorTest(tf.test.TestCase):
        }
      }
    """
-    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams)
    return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)

  def _build_feature_extractor(self):

--- a/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor.py
+++ b/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SSD Keras-based EfficientNet + BiFPN (EfficientDet) Feature Extractor."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import logging
+from six.moves import range
+from six.moves import zip
+import tensorflow.compat.v2 as tf
+
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import bidirectional_feature_pyramid_generators as bifpn_generators
+from object_detection.utils import ops
+from object_detection.utils import shape_utils
+from object_detection.utils import tf_version
+# pylint: disable=g-import-not-at-top
+if tf_version.is_tf2():
+  from official.vision.image_classification.efficientnet import efficientnet_model
+
+_EFFICIENTNET_LEVEL_ENDPOINTS = {
+    1: 'stack_0/block_0/project_bn',
+    2: 'stack_1/block_1/add',
+    3: 'stack_2/block_1/add',
+    4: 'stack_4/block_2/add',
+    5: 'stack_6/block_0/project_bn',
+}
+
+
+class SSDEfficientNetBiFPNKerasFeatureExtractor(
+    ssd_meta_arch.SSDKerasFeatureExtractor):
+  """SSD Keras-based EfficientNetBiFPN (EfficientDet) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level,
+               bifpn_max_level,
+               bifpn_num_iterations,
+               bifpn_num_filters,
+               bifpn_combine_method,
+               efficientnet_version,
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name=None):
+    """SSD Keras-based EfficientNetBiFPN (EfficientDet) feature extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      efficientnet_version: the EfficientNet version to use for this feature
+        extractor's backbone.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetBiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        use_explicit_padding=None,
+        use_depthwise=None,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+    if depth_multiplier != 1.0:
+      raise ValueError('EfficientNetBiFPN does not support a non-default '
+                       'depth_multiplier.')
+    if use_explicit_padding:
+      raise ValueError('EfficientNetBiFPN does not support explicit padding.')
+    if use_depthwise:
+      raise ValueError('EfficientNetBiFPN does not support use_depthwise.')
+    if override_base_feature_extractor_hyperparams:
+      raise ValueError('EfficientNetBiFPN does not support '
+                       'override_base_feature_extractor_hyperparams.')
+
+    self._bifpn_min_level = bifpn_min_level
+    self._bifpn_max_level = bifpn_max_level
+    self._bifpn_num_iterations = bifpn_num_iterations
+    self._bifpn_num_filters = max(bifpn_num_filters, min_depth)
+    self._bifpn_node_params = {'combine_method': bifpn_combine_method}
+    self._efficientnet_version = efficientnet_version
+
+    logging.info('EfficientDet EfficientNet backbone version: %s',
+                 self._efficientnet_version)
+    logging.info('EfficientDet BiFPN num filters: %d', self._bifpn_num_filters)
+    logging.info('EfficientDet BiFPN num iterations: %d',
+                 self._bifpn_num_iterations)
+
+    self._backbone_max_level = min(
+        max(_EFFICIENTNET_LEVEL_ENDPOINTS.keys()), bifpn_max_level)
+    self._output_layer_names = [
+        _EFFICIENTNET_LEVEL_ENDPOINTS[i]
+        for i in range(bifpn_min_level, self._backbone_max_level + 1)]
+    self._output_layer_alias = [
+        'level_{}'.format(i)
+        for i in range(bifpn_min_level, self._backbone_max_level + 1)]
+
+    # Initialize the EfficientNet backbone.
+    # Note, this is currently done in the init method rather than in the build
+    # method, since doing so introduces an error which is not well understood.
+    efficientnet_base = efficientnet_model.EfficientNet.from_name(
+        model_name=self._efficientnet_version,
+        overrides={'rescale_input': False})
+    outputs = [efficientnet_base.get_layer(output_layer_name).output
+               for output_layer_name in self._output_layer_names]
+    self._efficientnet = tf.keras.Model(
+        inputs=efficientnet_base.inputs, outputs=outputs)
+    self.classification_backbone = efficientnet_base
+    self._bifpn_stage = None
+
+  def build(self, input_shape):
+    self._bifpn_stage = bifpn_generators.KerasBiFpnFeatureMaps(
+        bifpn_num_iterations=self._bifpn_num_iterations,
+        bifpn_num_filters=self._bifpn_num_filters,
+        fpn_min_level=self._bifpn_min_level,
+        fpn_max_level=self._bifpn_max_level,
+        input_max_level=self._backbone_max_level,
+        is_training=self._is_training,
+        conv_hyperparams=self._conv_hyperparams,
+        freeze_batchnorm=self._freeze_batchnorm,
+        bifpn_node_params=self._bifpn_node_params,
+        name='bifpn')
+    self.built = True
+
+  def preprocess(self, inputs):
+    """SSD preprocessing.
+
+    Channel-wise mean subtraction and scaling.
+
+    Args:
+      inputs: a [batch, height, width, channels] float tensor representing a
+        batch of images.
+
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    if inputs.shape.as_list()[3] == 3:
+      # Input images are expected to be in the range [0, 255].
+      channel_offset = [0.485, 0.456, 0.406]
+      channel_scale = [0.229, 0.224, 0.225]
+      return ((inputs / 255.0) - [[channel_offset]]) / [[channel_scale]]
+    else:
+      return inputs
+
+  def _extract_features(self, preprocessed_inputs):
+    """Extract features from preprocessed inputs.
+
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+    """
+    preprocessed_inputs = shape_utils.check_min_image_dim(
+        129, preprocessed_inputs)
+
+    base_feature_maps = self._efficientnet(
+        ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
+
+    output_feature_map_dict = self._bifpn_stage(
+        list(zip(self._output_layer_alias, base_feature_maps)))
+
+    return list(output_feature_map_dict.values())
+
+
+class SSDEfficientNetB0BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b0 BiFPN (EfficientDet-d0) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=3,
+               bifpn_num_filters=64,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D0'):
+    """SSD Keras EfficientNet-b0 BiFPN (EfficientDet-d0) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB0BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b0',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB1BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b1 BiFPN (EfficientDet-d1) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=4,
+               bifpn_num_filters=88,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D1'):
+    """SSD Keras EfficientNet-b1 BiFPN (EfficientDet-d1) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB1BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b1',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB2BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b2 BiFPN (EfficientDet-d2) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=5,
+               bifpn_num_filters=112,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D2'):
+
+    """SSD Keras EfficientNet-b2 BiFPN (EfficientDet-d2) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB2BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b2',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB3BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b3 BiFPN (EfficientDet-d3) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=6,
+               bifpn_num_filters=160,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D3'):
+
+    """SSD Keras EfficientNet-b3 BiFPN (EfficientDet-d3) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB3BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b3',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB4BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b4 BiFPN (EfficientDet-d4) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=7,
+               bifpn_num_filters=224,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D4'):
+
+    """SSD Keras EfficientNet-b4 BiFPN (EfficientDet-d4) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB4BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b4',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB5BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b5 BiFPN (EfficientDet-d5) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=7,
+               bifpn_num_filters=288,
+               bifpn_combine_method='fast_attention',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D5'):
+
+    """SSD Keras EfficientNet-b5 BiFPN (EfficientDet-d5) Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB5BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b5',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB6BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b6 BiFPN (EfficientDet-d[6,7]) Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=8,
+               bifpn_num_filters=384,
+               bifpn_combine_method='sum',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientDet-D6-D7'):
+
+    """SSD Keras EfficientNet-b6 BiFPN (EfficientDet-d[6,7]) Feature Extractor.
+
+    SSD Keras EfficientNet-b6 BiFPN Feature Extractor, a.k.a. EfficientDet-d6
+    and EfficientDet-d7. The EfficientDet-d[6,7] models use the same backbone
+    EfficientNet-b6 and the same BiFPN architecture, and therefore have the same
+    number of parameters. They only differ in their input resolutions.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB6BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b6',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+
+
+class SSDEfficientNetB7BiFPNKerasFeatureExtractor(
+    SSDEfficientNetBiFPNKerasFeatureExtractor):
+  """SSD Keras EfficientNet-b7 BiFPN Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               bifpn_min_level=3,
+               bifpn_max_level=7,
+               bifpn_num_iterations=8,
+               bifpn_num_filters=384,
+               bifpn_combine_method='sum',
+               use_explicit_padding=None,
+               use_depthwise=None,
+               override_base_feature_extractor_hyperparams=None,
+               name='EfficientNet-B7_BiFPN'):
+
+    """SSD Keras EfficientNet-b7 BiFPN Feature Extractor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: unsupported by EfficientNetBiFPN. float, depth
+        multiplier for the feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: whether to freeze batch norm parameters during training
+        or not. When training with a small batch size (e.g. 1), it is desirable
+        to freeze batch norm update and use pretrained batch norm params.
+      inplace_batchnorm_update: whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      bifpn_min_level: the highest resolution feature map to use in BiFPN. The
+        valid values are {2, 3, 4, 5} which map to Resnet blocks {1, 2, 3, 4}
+        respectively.
+      bifpn_max_level: the smallest resolution feature map to use in the BiFPN.
+        BiFPN constructions uses features maps starting from bifpn_min_level
+        upto the bifpn_max_level. In the case that there are not enough feature
+        maps in the backbone network, additional feature maps are created by
+        applying stride 2 convolutions until we get the desired number of BiFPN
+        levels.
+      bifpn_num_iterations: number of BiFPN iterations. Overrided if
+        efficientdet_version is provided.
+      bifpn_num_filters: number of filters (channels) in all BiFPN layers.
+        Overrided if efficientdet_version is provided.
+      bifpn_combine_method: the method used to combine BiFPN nodes.
+      use_explicit_padding: unsupported by EfficientNetBiFPN. Whether to use
+        explicit padding when extracting features.
+      use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular
+        convolutions when inputs to a node have a differing number of channels,
+        and use separable convolutions after combine operations.
+      override_base_feature_extractor_hyperparams: unsupported. Whether to
+        override hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: a string name scope to assign to the model. If 'None', Keras will
+        auto-generate one from the class name.
+    """
+    super(SSDEfficientNetB7BiFPNKerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        bifpn_min_level=bifpn_min_level,
+        bifpn_max_level=bifpn_max_level,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method,
+        efficientnet_version='efficientnet-b7',
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
--- a/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor_tf2_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the ssd_efficientnet_bifpn_feature_extractor."""
+import unittest
+from absl.testing import parameterized
+
+import numpy as np
+import tensorflow.compat.v2 as tf
+
+from google.protobuf import text_format
+from object_detection.builders import hyperparams_builder
+from object_detection.models import ssd_efficientnet_bifpn_feature_extractor
+from object_detection.protos import hyperparams_pb2
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+
+def _count_params(model, trainable_only=True):
+  """Returns the count of all model parameters, or just trainable ones."""
+  if not trainable_only:
+    return model.count_params()
+  else:
+    return int(np.sum([
+        tf.keras.backend.count_params(p) for p in model.trainable_weights]))
+
+
+@parameterized.parameters(
+    {'efficientdet_version': 'efficientdet-d0',
+     'efficientnet_version': 'efficientnet-b0',
+     'bifpn_num_iterations': 3,
+     'bifpn_num_filters': 64,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d1',
+     'efficientnet_version': 'efficientnet-b1',
+     'bifpn_num_iterations': 4,
+     'bifpn_num_filters': 88,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d2',
+     'efficientnet_version': 'efficientnet-b2',
+     'bifpn_num_iterations': 5,
+     'bifpn_num_filters': 112,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d3',
+     'efficientnet_version': 'efficientnet-b3',
+     'bifpn_num_iterations': 6,
+     'bifpn_num_filters': 160,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d4',
+     'efficientnet_version': 'efficientnet-b4',
+     'bifpn_num_iterations': 7,
+     'bifpn_num_filters': 224,
+     'bifpn_combine_method': 'fast_attention'},
+    {'efficientdet_version': 'efficientdet-d5',
+     'efficientnet_version': 'efficientnet-b5',
+     'bifpn_num_iterations': 7,
+     'bifpn_num_filters': 288,
+     'bifpn_combine_method': 'fast_attention'},
+    # efficientdet-d6 and efficientdet-d7 only differ in input size.
+    {'efficientdet_version': 'efficientdet-d6-d7',
+     'efficientnet_version': 'efficientnet-b6',
+     'bifpn_num_iterations': 8,
+     'bifpn_num_filters': 384,
+     'bifpn_combine_method': 'sum'})
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class SSDEfficientNetBiFPNFeatureExtractorTest(
+    test_case.TestCase, parameterized.TestCase):
+
+  def _build_conv_hyperparams(self, add_batch_norm=True):
+    conv_hyperparams = hyperparams_pb2.Hyperparams()
+    conv_hyperparams_text_proto = """
+      force_use_bias: true
+      activation: SWISH
+      regularizer {
+        l2_regularizer {
+          weight: 0.0004
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.03
+          mean: 0.0
+        }
+      }
+    """
+    if add_batch_norm:
+      batch_norm_proto = """
+        batch_norm {
+          scale: true,
+          decay: 0.99,
+          epsilon: 0.001,
+        }
+      """
+      conv_hyperparams_text_proto += batch_norm_proto
+    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
+
+  def _create_feature_extractor(self,
+                                efficientnet_version='efficientnet-b0',
+                                bifpn_num_iterations=3,
+                                bifpn_num_filters=64,
+                                bifpn_combine_method='fast_attention'):
+    """Constructs a new EfficientNetBiFPN feature extractor."""
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    min_depth = 16
+    return (ssd_efficientnet_bifpn_feature_extractor
+            .SSDEfficientNetBiFPNKerasFeatureExtractor(
+                is_training=True,
+                depth_multiplier=depth_multiplier,
+                min_depth=min_depth,
+                pad_to_multiple=pad_to_multiple,
+                conv_hyperparams=self._build_conv_hyperparams(),
+                freeze_batchnorm=False,
+                inplace_batchnorm_update=False,
+                bifpn_min_level=3,
+                bifpn_max_level=7,
+                bifpn_num_iterations=bifpn_num_iterations,
+                bifpn_num_filters=bifpn_num_filters,
+                bifpn_combine_method=bifpn_combine_method,
+                efficientnet_version=efficientnet_version))
+
+  def test_efficientdet_feature_extractor_shapes(self,
+                                                 efficientdet_version,
+                                                 efficientnet_version,
+                                                 bifpn_num_iterations,
+                                                 bifpn_num_filters,
+                                                 bifpn_combine_method):
+    feature_extractor = self._create_feature_extractor(
+        efficientnet_version=efficientnet_version,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method)
+    outputs = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
+
+    self.assertEqual(outputs[0].shape, (2, 32, 32, bifpn_num_filters))
+    self.assertEqual(outputs[1].shape, (2, 16, 16, bifpn_num_filters))
+    self.assertEqual(outputs[2].shape, (2, 8, 8, bifpn_num_filters))
+    self.assertEqual(outputs[3].shape, (2, 4, 4, bifpn_num_filters))
+    self.assertEqual(outputs[4].shape, (2, 2, 2, bifpn_num_filters))
+
+  def test_efficientdet_feature_extractor_params(self,
+                                                 efficientdet_version,
+                                                 efficientnet_version,
+                                                 bifpn_num_iterations,
+                                                 bifpn_num_filters,
+                                                 bifpn_combine_method):
+    feature_extractor = self._create_feature_extractor(
+        efficientnet_version=efficientnet_version,
+        bifpn_num_iterations=bifpn_num_iterations,
+        bifpn_num_filters=bifpn_num_filters,
+        bifpn_combine_method=bifpn_combine_method)
+    _ = feature_extractor(np.zeros((2, 256, 256, 3), dtype=np.float32))
+    expected_params = {
+        'efficientdet-d0': 5484829,
+        'efficientdet-d1': 8185156,
+        'efficientdet-d2': 9818153,
+        'efficientdet-d3': 13792706,
+        'efficientdet-d4': 22691445,
+        'efficientdet-d5': 35795677,
+        'efficientdet-d6-d7': 53624512,
+    }
+    num_params = _count_params(feature_extractor)
+    self.assertEqual(expected_params[efficientdet_version], num_params)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/packages/tf1/setup.py
+++ b/research/object_detection/packages/tf1/setup.py
+"""Setup script for object_detection with TF1.0."""
+import os
+from setuptools import find_packages
+from setuptools import setup
+
+REQUIRED_PACKAGES = ['apache-beam', 'pillow', 'lxml', 'matplotlib', 'Cython',
+                     'contextlib2', 'tf-slim', 'six', 'pycocotools', 'scipy',
+                     'pandas']
+
+setup(
+    name='object_detection',
+    version='0.1',
+    install_requires=REQUIRED_PACKAGES,
+    include_package_data=True,
+    packages=(
+        [p for p in find_packages() if p.startswith('object_detection')] +
+        find_packages(where=os.path.join('.', 'slim'))),
+    package_dir={
+        'datasets': os.path.join('slim', 'datasets'),
+        'nets': os.path.join('slim', 'nets'),
+        'preprocessing': os.path.join('slim', 'preprocessing'),
+        'deployment': os.path.join('slim', 'deployment'),
+        'scripts': os.path.join('slim', 'scripts'),
+    },
+    description='Tensorflow Object Detection Library with TF1.0',
+    python_requires='>3.6',
+)
--- a/research/object_detection/packages/tf2/setup.py
+++ b/research/object_detection/packages/tf2/setup.py
+"""Setup script for object_detection with TF2.0."""
+import os
+from setuptools import find_packages
+from setuptools import setup
+
+# Note: adding apache-beam to required packages causes conflict with
+# tf-models-offical requirements. These packages request for incompatible
+# oauth2client package.
+REQUIRED_PACKAGES = ['pillow', 'lxml', 'matplotlib', 'Cython', 'contextlib2',
+                     'tf-slim', 'six', 'pycocotools', 'scipy', 'pandas',
+                     'tf-models-official']
+
+setup(
+    name='object_detection',
+    version='0.1',
+    install_requires=REQUIRED_PACKAGES,
+    include_package_data=True,
+    packages=(
+        [p for p in find_packages() if p.startswith('object_detection')] +
+        find_packages(where=os.path.join('.', 'slim'))),
+    package_dir={
+        'datasets': os.path.join('slim', 'datasets'),
+        'nets': os.path.join('slim', 'nets'),
+        'preprocessing': os.path.join('slim', 'preprocessing'),
+        'deployment': os.path.join('slim', 'deployment'),
+        'scripts': os.path.join('slim', 'scripts'),
+    },
+    description='Tensorflow Object Detection Library',
+    python_requires='>3.6',
+)
--- a/research/object_detection/predictors/heads/head.py
+++ b/research/object_detection/predictors/heads/head.py
@@ -61,7 +61,7 @@ class Head(object):
    pass


-class KerasHead(tf.keras.Model):
+class KerasHead(tf.keras.layers.Layer):
  """Keras head base class."""

  def call(self, features):

--- a/research/object_detection/protos/center_net.proto
+++ b/research/object_detection/protos/center_net.proto
@@ -183,6 +183,41 @@ message CenterNet {
    optional float heatmap_bias_init = 3 [default = -2.19];
  }
  optional MaskEstimation mask_estimation_task = 8;
+
+  // Parameters which are related to DensePose estimation task.
+  // http://densepose.org/
+  message DensePoseEstimation {
+    // Weight of the task loss. The total loss of the model will be their
+    // summation of task losses weighted by the weights.
+    optional float task_loss_weight = 1 [default = 1.0];
+
+    // Class ID (0-indexed) that corresponds to the object in the label map that
+    // contains DensePose data.
+    optional int32 class_id = 2;
+
+    // Loss configuration for DensePose heatmap and regression losses. Note
+    // that the localization loss is used for surface coordinate losses and
+    // classification loss is used for part classification losses.
+    optional Loss loss = 3;
+
+    // The number of body parts.
+    optional int32 num_parts = 4 [default = 24];
+
+    // Loss weights for the two DensePose heads.
+    optional float part_loss_weight = 5 [default = 1.0];
+    optional float coordinate_loss_weight = 6 [default = 1.0];
+
+    // Whether to upsample the prediction feature maps back to the original
+    // input dimension prior to applying loss. This has the benefit of
+    // maintaining finer groundtruth location information.
+    optional bool upsample_to_input_res = 7 [default = true];
+
+    // The initial bias value of the convlution kernel of the class heatmap
+    // prediction head. -2.19 corresponds to predicting foreground with
+    // a probability of 0.1.
+    optional float heatmap_bias_init = 8 [default = -2.19];
+  }
+  optional DensePoseEstimation densepose_estimation_task = 9;
 }

 message CenterNetFeatureExtractor {

--- a/research/object_detection/protos/preprocessor.proto
+++ b/research/object_detection/protos/preprocessor.proto
@@ -4,7 +4,7 @@ package object_detection.protos;

 // Message for defining a preprocessing operation on input data.
 // See: //third_party/tensorflow_models/object_detection/core/preprocessor.py
-// Next ID: 38
+// Next ID: 39
 message PreprocessingStep {
  oneof preprocessing_step {
    NormalizeImage normalize_image = 1;
@@ -44,6 +44,7 @@ message PreprocessingStep {
    RandomDownscaleToTargetPixels random_downscale_to_target_pixels = 35;
    RandomPatchGaussian random_patch_gaussian = 36;
    RandomSquareCropByScale random_square_crop_by_scale = 37;
+    RandomScaleCropAndPadToSquare random_scale_crop_and_pad_to_square = 38;
  }
 }

@@ -572,3 +573,20 @@ message RandomSquareCropByScale {
  // [min_scale, max_scale]
  optional int32 num_scales = 4 [default=8];
 }
+
+// Randomly scale, crop, and then pad an image to the desired square output
+// dimensions. Specifically, this method first samples a random_scale factor
+// from a uniform distribution between scale_min and scale_max, and then resizes
+// the image such that it's maximum dimension is (output_size * random_scale).
+// Secondly, a square output_size crop is extracted from the resized image, and
+// finally the cropped region is padded to the desired square output_size.
+// The augmentation is borrowed from [1]
+// [1]: https://arxiv.org/abs/1911.09070
+message RandomScaleCropAndPadToSquare {
+  // The (square) output image size
+  optional int32 output_size = 1 [default = 512];
+
+  // The minimum and maximum values from which to sample the random scale.
+  optional float scale_min = 2 [default=0.1];
+  optional float scale_max = 3 [default=2.0];
+}
--- a/research/object_detection/protos/ssd.proto
+++ b/research/object_detection/protos/ssd.proto
@@ -145,7 +145,7 @@ message Ssd {
  optional MaskHead mask_head_config = 25;
 }

-// Next id: 19.
+// Next id: 20.
 message SsdFeatureExtractor {
  reserved 6;

@@ -185,8 +185,13 @@ message SsdFeatureExtractor {
  // feature maps added by SSD.
  optional bool use_depthwise = 8 [default = false];

-  // Feature Pyramid Networks config.
-  optional FeaturePyramidNetworks fpn = 10;
+  oneof feature_pyramid_oneof {
+    // Feature Pyramid Networks config.
+    FeaturePyramidNetworks fpn = 10;
+
+    // Bidirectional Feature Pyramid Networks config.
+    BidirectionalFeaturePyramidNetworks bifpn = 19;
+  }

  // If true, replace preprocess function of feature extractor with a
  // placeholder. This should only be used if all the image preprocessing steps
@@ -225,3 +230,23 @@ message FeaturePyramidNetworks {

 }

+// Configuration for Bidirectional Feature Pyramid Networks.
+message BidirectionalFeaturePyramidNetworks {
+  // minimum level in the feature pyramid.
+  optional int32 min_level = 1 [default = 3];
+
+  // maximum level in the feature pyramid.
+  optional int32 max_level = 2 [default = 7];
+
+  // The number of repeated top-down bottom-up iterations for BiFPN-based
+  // feature extractors (bidirectional feature pyramid networks).
+  optional int32 num_iterations = 3;
+
+  // The number of filters (channels) to use in feature pyramid layers for
+  // BiFPN-based feature extractors (bidirectional feature pyramid networks).
+  optional int32 num_filters = 4;
+
+  // Method used to combine inputs to BiFPN nodes.
+  optional string combine_method = 5 [default = 'fast_attention'];
+}
+
--- a/research/object_detection/test_images/ducky/test/out1.jpg
+++ b/research/object_detection/test_images/ducky/test/out1.jpg
--- a/research/object_detection/test_images/ducky/test/out10.jpg
+++ b/research/object_detection/test_images/ducky/test/out10.jpg
--- a/research/object_detection/test_images/ducky/test/out11.jpg
+++ b/research/object_detection/test_images/ducky/test/out11.jpg
--- a/research/object_detection/test_images/ducky/test/out12.jpg
+++ b/research/object_detection/test_images/ducky/test/out12.jpg
--- a/research/object_detection/test_images/ducky/test/out13.jpg
+++ b/research/object_detection/test_images/ducky/test/out13.jpg
--- a/research/object_detection/test_images/ducky/test/out14.jpg
+++ b/research/object_detection/test_images/ducky/test/out14.jpg
--- a/research/object_detection/test_images/ducky/test/out15.jpg
+++ b/research/object_detection/test_images/ducky/test/out15.jpg
--- a/research/object_detection/test_images/ducky/test/out16.jpg
+++ b/research/object_detection/test_images/ducky/test/out16.jpg
--- a/research/object_detection/test_images/ducky/test/out17.jpg
+++ b/research/object_detection/test_images/ducky/test/out17.jpg
--- a/research/object_detection/test_images/ducky/test/out18.jpg
+++ b/research/object_detection/test_images/ducky/test/out18.jpg
--- a/research/object_detection/test_images/ducky/test/out19.jpg
+++ b/research/object_detection/test_images/ducky/test/out19.jpg