Add option to override base feature extractor hyperparams in SSD models. This...

Add option to override base feature extractor hyperparams in SSD models. This would allow us to use the same set of hyperparams for the complete feature extractor (base + new layers) if desired. PiperOrigin-RevId: 191787921

Add option to override base feature extractor hyperparams in SSD models. This...
Add option to override base feature extractor hyperparams in SSD models. This would allow us to use the same set of hyperparams for the complete feature extractor (base + new layers) if desired. PiperOrigin-RevId: 191787921
decbad8a · Zhichao Lu · pkulzc · 45ecc0f9 · decbad8a · decbad8a
Commit decbad8a authored Apr 05, 2018 by Zhichao Lu Committed by pkulzc Apr 13, 2018
18 changed files
--- a/research/object_detection/builders/hyperparams_builder.py
+++ b/research/object_detection/builders/hyperparams_builder.py
@@ -17,6 +17,7 @@
 import tensorflow as tf

 from object_detection.protos import hyperparams_pb2
+from object_detection.utils import context_manager

 slim = tf.contrib.slim

@@ -66,16 +67,19 @@ def build(hyperparams_config, is_training):
      hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
    affected_ops = [slim.fully_connected]
  def scope_fn():
-    with slim.arg_scope(
-        affected_ops,
-        weights_regularizer=_build_regularizer(
-            hyperparams_config.regularizer),
-        weights_initializer=_build_initializer(
-            hyperparams_config.initializer),
-        activation_fn=_build_activation_fn(hyperparams_config.activation),
-        normalizer_fn=batch_norm,
-        normalizer_params=batch_norm_params) as sc:
-      return sc
+    with (slim.arg_scope([slim.batch_norm], **batch_norm_params)
+          if batch_norm_params is not None else
+          context_manager.IdentityContextManager()):
+      with slim.arg_scope(
+          affected_ops,
+          weights_regularizer=_build_regularizer(
+              hyperparams_config.regularizer),
+          weights_initializer=_build_initializer(
+              hyperparams_config.initializer),
+          activation_fn=_build_activation_fn(hyperparams_config.activation),
+          normalizer_fn=batch_norm) as sc:
+        return sc
+
  return scope_fn


@@ -170,6 +174,9 @@ def _build_batch_norm_params(batch_norm, is_training):
      'center': batch_norm.center,
      'scale': batch_norm.scale,
      'epsilon': batch_norm.epsilon,
+      # Remove is_training parameter from here and deprecate it in the proto
+      # once we refactor Faster RCNN models to set is_training through an outer
+      # arg_scope in the meta architecture.
      'is_training': is_training and batch_norm.train,
  }
  return batch_norm_params
--- a/research/object_detection/builders/hyperparams_builder_test.py
+++ b/research/object_detection/builders/hyperparams_builder_test.py
@@ -26,11 +26,11 @@ from object_detection.protos import hyperparams_pb2
 slim = tf.contrib.slim


-class HyperparamsBuilderTest(tf.test.TestCase):
+def _get_scope_key(op):
+  return getattr(op, '_key_op', str(op))
+

-  # TODO(rathodv): Make this a public api in slim arg_scope.py.
-  def _get_scope_key(self, op):
-    return getattr(op, '_key_op', str(op))
+class HyperparamsBuilderTest(tf.test.TestCase):

  def test_default_arg_scope_has_conv2d_op(self):
    conv_hyperparams_text_proto = """
@@ -48,7 +48,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    self.assertTrue(self._get_scope_key(slim.conv2d) in scope)
+    self.assertTrue(_get_scope_key(slim.conv2d) in scope)

  def test_default_arg_scope_has_separable_conv2d_op(self):
    conv_hyperparams_text_proto = """
@@ -66,7 +66,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    self.assertTrue(self._get_scope_key(slim.separable_conv2d) in scope)
+    self.assertTrue(_get_scope_key(slim.separable_conv2d) in scope)

  def test_default_arg_scope_has_conv2d_transpose_op(self):
    conv_hyperparams_text_proto = """
@@ -84,7 +84,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    self.assertTrue(self._get_scope_key(slim.conv2d_transpose) in scope)
+    self.assertTrue(_get_scope_key(slim.conv2d_transpose) in scope)

  def test_explicit_fc_op_arg_scope_has_fully_connected_op(self):
    conv_hyperparams_text_proto = """
@@ -103,7 +103,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    self.assertTrue(self._get_scope_key(slim.fully_connected) in scope)
+    self.assertTrue(_get_scope_key(slim.fully_connected) in scope)

  def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self):
    conv_hyperparams_text_proto = """
@@ -166,7 +166,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]

    regularizer = conv_scope_arguments['weights_regularizer']
    weights = np.array([1., -1, 4., 2.])
@@ -197,9 +197,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
-    batch_norm_params = conv_scope_arguments['normalizer_params']
+    batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
    self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
    self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
    self.assertFalse(batch_norm_params['center'])
@@ -229,9 +229,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=False)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
-    batch_norm_params = conv_scope_arguments['normalizer_params']
+    batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
    self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
    self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
    self.assertFalse(batch_norm_params['center'])
@@ -261,9 +261,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
-    batch_norm_params = conv_scope_arguments['normalizer_params']
+    batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
    self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
    self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
    self.assertFalse(batch_norm_params['center'])
@@ -286,9 +286,8 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
-    self.assertEqual(conv_scope_arguments['normalizer_params'], None)

  def test_use_none_activation(self):
    conv_hyperparams_text_proto = """
@@ -307,7 +306,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    self.assertEqual(conv_scope_arguments['activation_fn'], None)

  def test_use_relu_activation(self):
@@ -327,7 +326,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)

  def test_use_relu_6_activation(self):
@@ -347,7 +346,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)

  def _assert_variance_in_range(self, initializer, shape, variance,
@@ -382,7 +381,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    initializer = conv_scope_arguments['weights_initializer']
    self._assert_variance_in_range(initializer, shape=[100, 40],
                                   variance=2. / 100.)
@@ -406,7 +405,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    initializer = conv_scope_arguments['weights_initializer']
    self._assert_variance_in_range(initializer, shape=[100, 40],
                                   variance=2. / 40.)
@@ -430,7 +429,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    initializer = conv_scope_arguments['weights_initializer']
    self._assert_variance_in_range(initializer, shape=[100, 40],
                                   variance=4. / (100. + 40.))
@@ -454,7 +453,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    initializer = conv_scope_arguments['weights_initializer']
    self._assert_variance_in_range(initializer, shape=[100, 40],
                                   variance=2. / 100.)
@@ -477,7 +476,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    initializer = conv_scope_arguments['weights_initializer']
    self._assert_variance_in_range(initializer, shape=[100, 40],
                                   variance=0.49, tol=1e-1)
@@ -500,7 +499,7 @@ class HyperparamsBuilderTest(tf.test.TestCase):
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
-    conv_scope_arguments = scope.values()[0]
+    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
    initializer = conv_scope_arguments['weights_initializer']
    self._assert_variance_in_range(initializer, shape=[100, 40],
                                   variance=0.64, tol=1e-1)

--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -120,6 +120,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
  use_depthwise = feature_extractor_config.use_depthwise
  conv_hyperparams = hyperparams_builder.build(
      feature_extractor_config.conv_hyperparams, is_training)
+  override_base_feature_extractor_hyperparams = (
+      feature_extractor_config.override_base_feature_extractor_hyperparams)

  if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
    raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
@@ -127,7 +129,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
  feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
  return feature_extractor_class(
      is_training, depth_multiplier, min_depth, pad_to_multiple,
-      conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise)
+      conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise,
+      override_base_feature_extractor_hyperparams)


 def _build_ssd_model(ssd_config, is_training, add_summaries):

--- a/research/object_detection/builders/model_builder_test.py
+++ b/research/object_detection/builders/model_builder_test.py
@@ -83,6 +83,7 @@ class ModelBuilderTest(tf.test.TestCase):
                }
              }
          }
+          override_base_feature_extractor_hyperparams: true
        }
        box_coder {
          faster_rcnn_box_coder {
@@ -154,6 +155,7 @@ class ModelBuilderTest(tf.test.TestCase):
                }
              }
          }
+          override_base_feature_extractor_hyperparams: true
        }
        box_coder {
          faster_rcnn_box_coder {

--- a/research/object_detection/meta_architectures/ssd_meta_arch.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch.py
@@ -45,7 +45,8 @@ class SSDFeatureExtractor(object):
               conv_hyperparams_fn,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
    """Constructor.

    Args:
@@ -55,12 +56,15 @@ class SSDFeatureExtractor(object):
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-        and separable_conv2d ops.
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
      reuse_weights: whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
-
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
    """
    self._is_training = is_training
    self._depth_multiplier = depth_multiplier
@@ -70,6 +74,8 @@ class SSDFeatureExtractor(object):
    self._reuse_weights = reuse_weights
    self._use_explicit_padding = use_explicit_padding
    self._use_depthwise = use_depthwise
+    self._override_base_feature_extractor_hyperparams = (
+        override_base_feature_extractor_hyperparams)

  @abstractmethod
  def preprocess(self, resized_inputs):

--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
@@ -17,16 +17,16 @@

 import tensorflow as tf

+from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
-from object_detection.models import ssd_mobilenet_v1_feature_extractor
+from object_detection.utils import context_manager
 from object_detection.utils import ops
 from nets import mobilenet_v1

 slim = tf.contrib.slim


-class EmbeddedSSDMobileNetV1FeatureExtractor(
-    ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor):
+class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
  """Embedded-friendly SSD Feature Extractor using MobilenetV1 features.

  This feature extractor is similar to SSD MobileNetV1 feature extractor, and
@@ -52,7 +52,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
               conv_hyperparams_fn,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
    """MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.

    Args:
@@ -62,11 +63,15 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to. For EmbeddedSSD it must be set to 1.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-        and separable_conv2d ops.
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.

    Raises:
      ValueError: upon invalid `pad_to_multiple` values.
@@ -77,7 +82,23 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(

    super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
+        override_base_feature_extractor_hyperparams)
+
+  def preprocess(self, resized_inputs):
+    """SSD preprocessing.
+
+    Maps pixel values to the range [-1, 1].
+
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    return (2.0 / 255.0) * resized_inputs - 1.0

  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.
@@ -119,16 +140,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
        'use_depthwise': self._use_depthwise,
    }

-    with slim.arg_scope(self._conv_hyperparams_fn()):
-      with slim.arg_scope([slim.batch_norm], fused=False):
-        with tf.variable_scope('MobilenetV1',
-                               reuse=self._reuse_weights) as scope:
-          _, image_features = mobilenet_v1.mobilenet_v1_base(
-              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
-              final_endpoint='Conv2d_13_pointwise',
-              min_depth=self._min_depth,
-              depth_multiplier=self._depth_multiplier,
-              scope=scope)
+    with tf.variable_scope('MobilenetV1',
+                           reuse=self._reuse_weights) as scope:
+      with slim.arg_scope(
+          mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
+        with (slim.arg_scope(self._conv_hyperparams_fn())
+              if self._override_base_feature_extractor_hyperparams
+              else context_manager.IdentityContextManager()):
+        # TODO(skligys): Enable fused batch norm once quantization supports it.
+          with slim.arg_scope([slim.batch_norm], fused=False):
+            _, image_features = mobilenet_v1.mobilenet_v1_base(
+                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+                final_endpoint='Conv2d_13_pointwise',
+                min_depth=self._min_depth,
+                depth_multiplier=self._depth_multiplier,
+                use_explicit_padding=self._use_explicit_padding,
+                scope=scope)
+      with slim.arg_scope(self._conv_hyperparams_fn()):
+        # TODO(skligys): Enable fused batch norm once quantization supports it.
+        with slim.arg_scope([slim.batch_norm], fused=False):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,

--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
@@ -41,7 +41,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
    return (embedded_ssd_mobilenet_v1_feature_extractor.
            EmbeddedSSDMobileNetV1FeatureExtractor(
                is_training, depth_multiplier, min_depth, pad_to_multiple,
-                self.conv_hyperparams_fn))
+                self.conv_hyperparams_fn,
+                override_base_feature_extractor_hyperparams=True))

  def test_extract_features_returns_correct_shapes_256(self):
    image_height = 256

--- a/research/object_detection/models/ssd_inception_v2_feature_extractor.py
+++ b/research/object_detection/models/ssd_inception_v2_feature_extractor.py
@@ -36,7 +36,8 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               conv_hyperparams_fn,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
    """InceptionV2 Feature Extractor for SSD Models.

    Args:
@@ -46,15 +47,29 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-        and separable_conv2d ops.
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
+
+    Raises:
+      ValueError: If `override_base_feature_extractor_hyperparams` is False.
    """
    super(SSDInceptionV2FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
+        override_base_feature_extractor_hyperparams)
+    if not self._override_base_feature_extractor_hyperparams:
+      raise ValueError('SSD Inception V2 feature extractor always uses'
+                       'scope returned by `conv_hyperparams_fn` for both the '
+                       'base feature extractor and the additional layers '
+                       'added since there is no arg_scope defined for the base '
+                       'feature extractor.')

  def preprocess(self, resized_inputs):
    """SSD preprocessing.

--- a/research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
@@ -40,7 +40,8 @@ class SsdInceptionV2FeatureExtractorTest(
    min_depth = 32
    return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        self.conv_hyperparams_fn)
+        self.conv_hyperparams_fn,
+        override_base_feature_extractor_hyperparams=True)

  def test_extract_features_returns_correct_shapes_128(self):
    image_height = 128

--- a/research/object_detection/models/ssd_inception_v3_feature_extractor.py
+++ b/research/object_detection/models/ssd_inception_v3_feature_extractor.py
@@ -36,7 +36,8 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               conv_hyperparams_fn,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
    """InceptionV3 Feature Extractor for SSD Models.

    Args:
@@ -46,15 +47,30 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-        and separable_conv2d ops.
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
+
+    Raises:
+      ValueError: If `override_base_feature_extractor_hyperparams` is False.
    """
    super(SSDInceptionV3FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
+        override_base_feature_extractor_hyperparams)
+
+    if not self._override_base_feature_extractor_hyperparams:
+      raise ValueError('SSD Inception V3 feature extractor always uses'
+                       'scope returned by `conv_hyperparams_fn` for both the '
+                       'base feature extractor and the additional layers '
+                       'added since there is no arg_scope defined for the base '
+                       'feature extractor.')

  def preprocess(self, resized_inputs):
    """SSD preprocessing.

--- a/research/object_detection/models/ssd_inception_v3_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_inception_v3_feature_extractor_test.py
@@ -40,7 +40,8 @@ class SsdInceptionV3FeatureExtractorTest(
    min_depth = 32
    return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        self.conv_hyperparams_fn)
+        self.conv_hyperparams_fn,
+        override_base_feature_extractor_hyperparams=True)

  def test_extract_features_returns_correct_shapes_128(self):
    image_height = 128

--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
@@ -19,6 +19,7 @@ import tensorflow as tf

 from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
+from object_detection.utils import context_manager
 from object_detection.utils import ops
 from object_detection.utils import shape_utils
 from nets import mobilenet_v1
@@ -37,7 +38,8 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               conv_hyperparams_fn,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
    """MobileNetV1 Feature Extractor for SSD Models.

    Args:
@@ -47,16 +49,21 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-        and separable_conv2d ops.
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
        inputs so that the output dimensions are the same as if 'SAME' padding
        were used.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
    """
    super(SSDMobileNetV1FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
+        override_base_feature_extractor_hyperparams)

  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -99,15 +106,18 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
+        with (slim.arg_scope(self._conv_hyperparams_fn())
+              if self._override_base_feature_extractor_hyperparams
+              else context_manager.IdentityContextManager()):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
-        with slim.arg_scope([slim.batch_norm], fused=False):
-          _, image_features = mobilenet_v1.mobilenet_v1_base(
-              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
-              final_endpoint='Conv2d_13_pointwise',
-              min_depth=self._min_depth,
-              depth_multiplier=self._depth_multiplier,
-              use_explicit_padding=self._use_explicit_padding,
-              scope=scope)
+          with slim.arg_scope([slim.batch_norm], fused=False):
+            _, image_features = mobilenet_v1.mobilenet_v1_base(
+                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+                final_endpoint='Conv2d_13_pointwise',
+                min_depth=self._min_depth,
+                depth_multiplier=self._depth_multiplier,
+                use_explicit_padding=self._use_explicit_padding,
+                scope=scope)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
        with slim.arg_scope([slim.batch_norm], fused=False):

--- a/research/object_detection/models/ssd_mobilenet_v2_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_feature_extractor.py
@@ -19,6 +19,7 @@ import tensorflow as tf

 from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
+from object_detection.utils import context_manager
 from object_detection.utils import ops
 from object_detection.utils import shape_utils
 from nets.mobilenet import mobilenet
@@ -38,7 +39,8 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               conv_hyperparams_fn,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
    """MobileNetV2 Feature Extractor for SSD Models.

    Mobilenet v2 (experimental), designed by sandler@. More details can be found
@@ -51,15 +53,20 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-        and separable_conv2d ops.
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
    """
    super(SSDMobileNetV2FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
+        override_base_feature_extractor_hyperparams)

  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -102,15 +109,18 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
-        # TODO(b/68150321): Enable fused batch norm once quantization
-        # supports it.
-        with slim.arg_scope([slim.batch_norm], fused=False):
-          _, image_features = mobilenet_v2.mobilenet_base(
-              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
-              final_endpoint='layer_19',
-              depth_multiplier=self._depth_multiplier,
-              use_explicit_padding=self._use_explicit_padding,
-              scope=scope)
+        with (slim.arg_scope(self._conv_hyperparams_fn())
+              if self._override_base_feature_extractor_hyperparams else
+              context_manager.IdentityContextManager()):
+          # TODO(b/68150321): Enable fused batch norm once quantization
+          # supports it.
+          with slim.arg_scope([slim.batch_norm], fused=False):
+            _, image_features = mobilenet_v2.mobilenet_base(
+                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+                final_endpoint='layer_19',
+                depth_multiplier=self._depth_multiplier,
+                use_explicit_padding=self._use_explicit_padding,
+                scope=scope)
        with slim.arg_scope(self._conv_hyperparams_fn()):
          # TODO(b/68150321): Enable fused batch norm once quantization
          # supports it.

--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
@@ -21,6 +21,7 @@ import tensorflow as tf

 from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
+from object_detection.utils import context_manager
 from object_detection.utils import ops
 from object_detection.utils import shape_utils
 from nets import resnet_v1
@@ -42,7 +43,8 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               fpn_scope_name,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
    """SSD FPN feature extractor based on Resnet v1 architecture.

    Args:
@@ -53,7 +55,8 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-        and separable_conv2d ops.
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
      resnet_base_fn: base resnet network to use.
      resnet_scope_name: scope name under which to construct resnet
      fpn_scope_name: scope name under which to construct the feature pyramid
@@ -62,13 +65,17 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False. UNUSED currently.
      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.

    Raises:
      ValueError: On supplying invalid arguments for unused arguments.
    """
    super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams_fn, reuse_weights, use_explicit_padding)
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding,
+        override_base_feature_extractor_hyperparams)
    if self._depth_multiplier != 1.0:
      raise ValueError('Only depth 1.0 is supported, found: {}'.
                       format(self._depth_multiplier))
@@ -128,15 +135,18 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
-        _, image_features = self._resnet_base_fn(
-            inputs=ops.pad_to_multiple(preprocessed_inputs,
-                                       self._pad_to_multiple),
-            num_classes=None,
-            is_training=None,
-            global_pool=False,
-            output_stride=None,
-            store_non_strided_activations=True,
-            scope=scope)
+        with (slim.arg_scope(self._conv_hyperparams_fn())
+              if self._override_base_feature_extractor_hyperparams else
+              context_manager.IdentityContextManager()):
+          _, image_features = self._resnet_base_fn(
+              inputs=ops.pad_to_multiple(preprocessed_inputs,
+                                         self._pad_to_multiple),
+              num_classes=None,
+              is_training=None,
+              global_pool=False,
+              output_stride=None,
+              store_non_strided_activations=True,
+              scope=scope)
      image_features = self._filter_features(image_features)
      last_feature_map = image_features['block4']
    with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
@@ -167,28 +177,36 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
               depth_multiplier,
               min_depth,
               pad_to_multiple,
-               conv_hyperparams,
+               conv_hyperparams_fn,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
-    """Resnet50 v1 FPN Feature Extractor for SSD Models.
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
+    """SSD Resnet50 V1 FPN feature extractor based on Resnet v1 architecture.

    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-      min_depth: minimum feature extractor depth.
+        UNUSED currently.
+      min_depth: minimum feature extractor depth. UNUSED Currently.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False. UNUSED currently.
      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
    """
    super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
-        reuse_weights, use_explicit_padding)
+        conv_hyperparams_fn, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
+        reuse_weights, use_explicit_padding,
+        override_base_feature_extractor_hyperparams)


 class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
@@ -198,28 +216,36 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
               depth_multiplier,
               min_depth,
               pad_to_multiple,
-               conv_hyperparams,
+               conv_hyperparams_fn,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
-    """Resnet101 v1 FPN Feature Extractor for SSD Models.
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
+    """SSD Resnet101 V1 FPN feature extractor based on Resnet v1 architecture.

    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-      min_depth: minimum feature extractor depth.
+        UNUSED currently.
+      min_depth: minimum feature extractor depth. UNUSED Currently.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False. UNUSED currently.
      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
    """
    super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
-        reuse_weights, use_explicit_padding)
+        conv_hyperparams_fn, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
+        reuse_weights, use_explicit_padding,
+        override_base_feature_extractor_hyperparams)


 class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
@@ -229,25 +255,33 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
               depth_multiplier,
               min_depth,
               pad_to_multiple,
-               conv_hyperparams,
+               conv_hyperparams_fn,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
-    """Resnet152 v1 FPN Feature Extractor for SSD Models.
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
+    """SSD Resnet152 V1 FPN feature extractor based on Resnet v1 architecture.

    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-      min_depth: minimum feature extractor depth.
+        UNUSED currently.
+      min_depth: minimum feature extractor depth. UNUSED Currently.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False. UNUSED currently.
      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
    """
    super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
-        reuse_weights, use_explicit_padding)
+        conv_hyperparams_fn, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
+        reuse_weights, use_explicit_padding,
+        override_base_feature_extractor_hyperparams)
--- a/research/object_detection/protos/ssd.proto
+++ b/research/object_detection/protos/ssd.proto
@@ -99,6 +99,14 @@ message SsdFeatureExtractor {
  // of the base feature extractor.
  optional Hyperparams conv_hyperparams = 4;

+  // Normally, SSD feature extractors are constructed by reusing an existing
+  // base feature extractor (that has its own hyperparams) and adding new layers
+  // on top of it. `conv_hyperparams` above normally applies only to the new
+  // layers while base feature extractor uses its own default hyperparams. If
+  // this value is set to true, the base feature extractor's hyperparams will be
+  // overridden with the `conv_hyperparams`.
+  optional bool override_base_feature_extractor_hyperparams = 9 [default = false];
+
  // The nearest multiple to zero-pad the input height and width dimensions to.
  // For example, if pad_to_multiple = 2, input dimensions are zero-padded
  // until the resulting dimensions are even.

--- a/research/object_detection/samples/configs/ssd_inception_v2_coco.config
+++ b/research/object_detection/samples/configs/ssd_inception_v2_coco.config
@@ -98,6 +98,7 @@ model {
          epsilon: 0.001,
        }
      }
+      override_base_feature_extractor_hyperparams: true
    }
    loss {
      classification_loss {

--- a/research/object_detection/samples/configs/ssd_inception_v2_pets.config
+++ b/research/object_detection/samples/configs/ssd_inception_v2_pets.config
@@ -98,6 +98,7 @@ model {
          epsilon: 0.001,
        }
      }
+      override_base_feature_extractor_hyperparams: true
    }
    loss {
      classification_loss {

--- a/research/object_detection/samples/configs/ssd_inception_v3_pets.config
+++ b/research/object_detection/samples/configs/ssd_inception_v3_pets.config
@@ -98,6 +98,7 @@ model {
          epsilon: 0.01,
        }
      }
+      override_base_feature_extractor_hyperparams: true
    }
    loss {
      classification_loss {