Merge pull request #3973 from pkulzc/master

Object detection internal changes

Merge pull request #3973 from pkulzc/master
Object detection internal changes
ed4e22b8 · pkulzc · GitHub · cac90a0e · 13b89b93 · ed4e22b8
Unverified Commit ed4e22b8 authored Apr 16, 2018 by pkulzc Committed by GitHub Apr 16, 2018
20 changed files
--- a/research/object_detection/meta_architectures/ssd_meta_arch.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch.py
@@ -42,12 +42,11 @@ class SSDFeatureExtractor(object):
               depth_multiplier,
               min_depth,
               pad_to_multiple,
-               conv_hyperparams,
+               conv_hyperparams_fn,
-               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
               use_depthwise=False,
-               inplace_batchnorm_update=False):
+               override_base_feature_extractor_hyperparams=False):
    """Constructor.
    Args:
@@ -56,30 +55,27 @@ class SSDFeatureExtractor(object):
      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-      batch_norm_trainable: Whether to update batch norm parameters during
+        and separable_conv2d ops in the layers that are added on top of the
-        training or not. When training with a small batch size
+        base feature extractor.
-        (e.g. 1), it is desirable to disable batch norm update and use
-        pretrained batch norm params.
      reuse_weights: whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
-      inplace_batchnorm_update: Whether to update batch norm moving average
+      override_base_feature_extractor_hyperparams: Whether to override
-        values inplace. When this is false train op must add a control
+        hyperparameters of the base feature extractor with the one from
-        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        `conv_hyperparams_fn`.
-        batch norm statistics.
    """
    self._is_training = is_training
    self._depth_multiplier = depth_multiplier
    self._min_depth = min_depth
    self._pad_to_multiple = pad_to_multiple
-    self._conv_hyperparams = conv_hyperparams
+    self._conv_hyperparams_fn = conv_hyperparams_fn
-    self._batch_norm_trainable = batch_norm_trainable
-    self._inplace_batchnorm_update = inplace_batchnorm_update
    self._reuse_weights = reuse_weights
    self._use_explicit_padding = use_explicit_padding
    self._use_depthwise = use_depthwise
+    self._override_base_feature_extractor_hyperparams = (
+        override_base_feature_extractor_hyperparams)
  @abstractmethod
  def preprocess(self, resized_inputs):
@@ -106,28 +102,6 @@ class SSDFeatureExtractor(object):
    This function is responsible for extracting feature maps from preprocessed
    images.
-    Args:
-      preprocessed_inputs: a [batch, height, width, channels] float tensor
-        representing a batch of images.
-    Returns:
-      feature_maps: a list of tensors where the ith tensor has shape
-        [batch, height_i, width_i, depth_i]
-    """
-    batchnorm_updates_collections = (None if self._inplace_batchnorm_update
-                                     else tf.GraphKeys.UPDATE_OPS)
-    with slim.arg_scope([slim.batch_norm],
-                        updates_collections=batchnorm_updates_collections):
-      return self._extract_features(preprocessed_inputs)
-  @abstractmethod
-  def _extract_features(self, preprocessed_inputs):
-    """Extracts features from preprocessed inputs.
-    This function is responsible for extracting feature maps from preprocessed
-    images.
    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.
@@ -162,7 +136,10 @@ class SSDMetaArch(model.DetectionModel):
               normalize_loss_by_num_matches,
               hard_example_miner,
               add_summaries=True,
-               normalize_loc_loss_by_codesize=False):
+               normalize_loc_loss_by_codesize=False,
+               freeze_batchnorm=False,
+               inplace_batchnorm_update=False,
+               add_background_class=True):
    """SSDMetaArch Constructor.
    TODO(rathodv,jonathanhuang): group NMS parameters + score converter into
@@ -209,9 +186,23 @@ class SSDMetaArch(model.DetectionModel):
        should be added to tensorflow graph.
      normalize_loc_loss_by_codesize: whether to normalize localization loss
        by code size of the box encoder.
+      freeze_batchnorm: Whether to freeze batch norm parameters during
+        training or not. When training with a small batch size (e.g. 1), it is
+        desirable to freeze batch norm update and use pretrained batch norm
+        params.
+      inplace_batchnorm_update: Whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      add_background_class: Whether to add an implicit background class to
+        one-hot encodings of groundtruth labels. Set to false if using
+        groundtruth labels with an explicit background class or using multiclass
+        scores instead of truth in the case of distillation.
    """
    super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes)
    self._is_training = is_training
+    self._freeze_batchnorm = freeze_batchnorm
+    self._inplace_batchnorm_update = inplace_batchnorm_update
    # Needed for fine-tuning from classification checkpoints whose
    # variables do not have the feature extractor scope.
@@ -224,6 +215,7 @@ class SSDMetaArch(model.DetectionModel):
    self._feature_extractor = feature_extractor
    self._matcher = matcher
    self._region_similarity_calculator = region_similarity_calculator
+    self._add_background_class = add_background_class
    # TODO(jonathanhuang): handle agnostic mode
    # weights
@@ -255,6 +247,7 @@ class SSDMetaArch(model.DetectionModel):
    self._anchors = None
    self._add_summaries = add_summaries
+    self._batched_prediction_tensor_names = []
  @property
  def anchors(self):
@@ -264,6 +257,13 @@ class SSDMetaArch(model.DetectionModel):
      raise RuntimeError('anchors should be a BoxList object, but is not.')
    return self._anchors
+  @property
+  def batched_prediction_tensor_names(self):
+    if not self._batched_prediction_tensor_names:
+      raise RuntimeError('Must call predict() method to get batched prediction '
+                         'tensor names.')
+    return self._batched_prediction_tensor_names
  def preprocess(self, inputs):
    """Feature-extractor specific preprocessing.
@@ -372,32 +372,42 @@ class SSDMetaArch(model.DetectionModel):
        5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
          the generated anchors in normalized coordinates.
    """
-    with tf.variable_scope(None, self._extract_features_scope,
+    batchnorm_updates_collections = (None if self._inplace_batchnorm_update
-                           [preprocessed_inputs]):
+                                     else tf.GraphKeys.UPDATE_OPS)
-      feature_maps = self._feature_extractor.extract_features(
+    with slim.arg_scope([slim.batch_norm],
+                        is_training=(self._is_training and
+                                     not self._freeze_batchnorm),
+                        updates_collections=batchnorm_updates_collections):
+      with tf.variable_scope(None, self._extract_features_scope,
+                             [preprocessed_inputs]):
+        feature_maps = self._feature_extractor.extract_features(
+            preprocessed_inputs)
+      feature_map_spatial_dims = self._get_feature_map_spatial_dims(
+          feature_maps)
+      image_shape = shape_utils.combined_static_and_dynamic_shape(
          preprocessed_inputs)
-    feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps)
+      self._anchors = box_list_ops.concatenate(
-    image_shape = shape_utils.combined_static_and_dynamic_shape(
+          self._anchor_generator.generate(
-        preprocessed_inputs)
+              feature_map_spatial_dims,
-    self._anchors = box_list_ops.concatenate(
+              im_height=image_shape[1],
-        self._anchor_generator.generate(
+              im_width=image_shape[2]))
-            feature_map_spatial_dims,
+      prediction_dict = self._box_predictor.predict(
-            im_height=image_shape[1],
+          feature_maps, self._anchor_generator.num_anchors_per_location())
-            im_width=image_shape[2]))
+      box_encodings = tf.squeeze(
-    prediction_dict = self._box_predictor.predict(
+          tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)
-        feature_maps, self._anchor_generator.num_anchors_per_location())
+      class_predictions_with_background = tf.concat(
-    box_encodings = tf.squeeze(
+          prediction_dict['class_predictions_with_background'], axis=1)
-        tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)
+      predictions_dict = {
-    class_predictions_with_background = tf.concat(
+          'preprocessed_inputs': preprocessed_inputs,
-        prediction_dict['class_predictions_with_background'], axis=1)
+          'box_encodings': box_encodings,
-    predictions_dict = {
+          'class_predictions_with_background':
-        'preprocessed_inputs': preprocessed_inputs,
+          class_predictions_with_background,
-        'box_encodings': box_encodings,
+          'feature_maps': feature_maps,
-        'class_predictions_with_background': class_predictions_with_background,
+          'anchors': self._anchors.get()
-        'feature_maps': feature_maps,
+      }
-        'anchors': self._anchors.get()
+      self._batched_prediction_tensor_names = [x for x in predictions_dict
-    }
+                                               if x != 'anchors']
-    return predictions_dict
+      return predictions_dict
  def _get_feature_map_spatial_dims(self, feature_maps):
    """Return list of spatial dimensions for each feature map in a list.
@@ -578,8 +588,8 @@ class SSDMetaArch(model.DetectionModel):
                                        name='classification_loss')
      loss_dict = {
-          localization_loss.op.name: localization_loss,
+          str(localization_loss.op.name): localization_loss,
-          classification_loss.op.name: classification_loss
+          str(classification_loss.op.name): classification_loss
      }
    return loss_dict
@@ -632,10 +642,14 @@ class SSDMetaArch(model.DetectionModel):
    groundtruth_boxlists = [
        box_list.BoxList(boxes) for boxes in groundtruth_boxes_list
    ]
-    groundtruth_classes_with_background_list = [
+    if self._add_background_class:
-        tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')
+      groundtruth_classes_with_background_list = [
-        for one_hot_encoding in groundtruth_classes_list
+          tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')
-    ]
+          for one_hot_encoding in groundtruth_classes_list
+      ]
+    else:
+      groundtruth_classes_with_background_list = groundtruth_classes_list
    if groundtruth_keypoints_list is not None:
      for boxlist, keypoints in zip(
          groundtruth_boxlists, groundtruth_keypoints_list):

--- a/research/object_detection/meta_architectures/ssd_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch_test.py
@@ -38,8 +38,7 @@ class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        depth_multiplier=0,
        min_depth=0,
        pad_to_multiple=1,
-        batch_norm_trainable=True,
+        conv_hyperparams_fn=None)
-        conv_hyperparams=None)
  def preprocess(self, resized_inputs):
    return tf.identity(resized_inputs)
@@ -81,8 +80,10 @@ def _get_value_for_matching_key(dictionary, suffix):
 class SsdMetaArchTest(test_case.TestCase):
-  def _create_model(self, apply_hard_mining=True,
+  def _create_model(self,
-                    normalize_loc_loss_by_codesize=False):
+                    apply_hard_mining=True,
+                    normalize_loc_loss_by_codesize=False,
+                    add_background_class=True):
    is_training = False
    num_classes = 1
    mock_anchor_generator = MockAnchorGenerator2x2()
@@ -118,13 +119,29 @@ class SsdMetaArchTest(test_case.TestCase):
    code_size = 4
    model = ssd_meta_arch.SSDMetaArch(
-        is_training, mock_anchor_generator, mock_box_predictor, mock_box_coder,
+        is_training,
-        fake_feature_extractor, mock_matcher, region_similarity_calculator,
+        mock_anchor_generator,
-        encode_background_as_zeros, negative_class_weight, image_resizer_fn,
+        mock_box_predictor,
-        non_max_suppression_fn, tf.identity, classification_loss,
+        mock_box_coder,
-        localization_loss, classification_loss_weight, localization_loss_weight,
+        fake_feature_extractor,
-        normalize_loss_by_num_matches, hard_example_miner, add_summaries=False,
+        mock_matcher,
-        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize)
+        region_similarity_calculator,
+        encode_background_as_zeros,
+        negative_class_weight,
+        image_resizer_fn,
+        non_max_suppression_fn,
+        tf.identity,
+        classification_loss,
+        localization_loss,
+        classification_loss_weight,
+        localization_loss_weight,
+        normalize_loss_by_num_matches,
+        hard_example_miner,
+        add_summaries=False,
+        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
+        freeze_batchnorm=False,
+        inplace_batchnorm_update=False,
+        add_background_class=add_background_class)
    return model, num_classes, mock_anchor_generator.num_anchors(), code_size
  def test_preprocess_preserves_shapes_with_dynamic_input_image(self):
@@ -365,6 +382,43 @@ class SsdMetaArchTest(test_case.TestCase):
    self.assertAllClose(localization_loss, expected_localization_loss)
    self.assertAllClose(classification_loss, expected_classification_loss)
+  def test_loss_results_are_correct_without_add_background_class(self):
+    with tf.Graph().as_default():
+      _, num_classes, num_anchors, _ = self._create_model(
+          add_background_class=False)
+    def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2,
+                 groundtruth_classes1, groundtruth_classes2):
+      groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2]
+      groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2]
+      model, _, _, _ = self._create_model(
+          apply_hard_mining=False, add_background_class=False)
+      model.provide_groundtruth(groundtruth_boxes_list,
+                                groundtruth_classes_list)
+      prediction_dict = model.predict(
+          preprocessed_tensor, true_image_shapes=None)
+      loss_dict = model.loss(prediction_dict, true_image_shapes=None)
+      return (loss_dict['Loss/localization_loss'],
+              loss_dict['Loss/classification_loss'])
+    batch_size = 2
+    preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
+    groundtruth_boxes1 = np.array([[0, 0, .5, .5]], dtype=np.float32)
+    groundtruth_boxes2 = np.array([[0, 0, .5, .5]], dtype=np.float32)
+    groundtruth_classes1 = np.array([[0, 1]], dtype=np.float32)
+    groundtruth_classes2 = np.array([[0, 1]], dtype=np.float32)
+    expected_localization_loss = 0.0
+    expected_classification_loss = (
+        batch_size * num_anchors * (num_classes + 1) * np.log(2.0))
+    (localization_loss, classification_loss) = self.execute(
+        graph_fn, [
+            preprocessed_input, groundtruth_boxes1, groundtruth_boxes2,
+            groundtruth_classes1, groundtruth_classes2
+        ])
+    self.assertAllClose(localization_loss, expected_localization_loss)
+    self.assertAllClose(classification_loss, expected_classification_loss)
  def test_restore_map_for_detection_ckpt(self):
    model, _, _, _ = self._create_model()
    model.predict(tf.constant(np.array([[[0, 0], [1, 1]], [[1, 0], [0, 1]]],

--- a/research/object_detection/model.py
+++ b/research/object_detection/model.py
@@ -12,13 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-r"""Creates and runs `Experiment` for object detection model.
+r"""Constructs model, inputs, and training environment."""
-This uses the TF.learn framework to define and run an object detection model
-wrapped in an `Estimator`.
-Note that this module is only compatible with SSD Meta architecture at the
-moment.
-"""
 from __future__ import absolute_import
 from __future__ import division
@@ -29,13 +23,8 @@ import os
 import tensorflow as tf
-from google.protobuf import text_format
-from tensorflow.contrib.learn.python.learn import learn_runner
-from tensorflow.contrib.tpu.python.tpu import tpu_optimizer
-from tensorflow.python.lib.io import file_io
 from object_detection import eval_util
 from object_detection import inputs
-from object_detection import model_hparams
 from object_detection.builders import model_builder
 from object_detection.builders import optimizer_builder
 from object_detection.core import standard_fields as fields
@@ -45,15 +34,6 @@ from object_detection.utils import shape_utils
 from object_detection.utils import variables_helper
 from object_detection.utils import visualization_utils as vis_utils
-tf.flags.DEFINE_string('model_dir', None, 'Path to output model directory '
-                       'where event and checkpoint files will be written.')
-tf.flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
-                       'file.')
-tf.flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
-tf.flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
-FLAGS = tf.flags.FLAGS
 # A map of names to methods that help build the model.
 MODEL_BUILD_UTIL_MAP = {
    'get_configs_from_pipeline_file':
@@ -297,17 +277,20 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
          regularization_loss = tf.add_n(regularization_losses,
                                         name='regularization_loss')
          losses.append(regularization_loss)
-          if not use_tpu:
+          losses_dict['Loss/regularization_loss'] = regularization_loss
-            tf.summary.scalar('regularization_loss', regularization_loss)
      total_loss = tf.add_n(losses, name='total_loss')
+      losses_dict['Loss/total_loss'] = total_loss
-    if mode == tf.estimator.ModeKeys.TRAIN:
+    if mode in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL]:
+      # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
+      # can write learning rate summaries on TPU without host calls.
      global_step = tf.train.get_or_create_global_step()
      training_optimizer, optimizer_summary_vars = optimizer_builder.build(
          train_config.optimizer)
+    if mode == tf.estimator.ModeKeys.TRAIN:
      if use_tpu:
-        training_optimizer = tpu_optimizer.CrossShardOptimizer(
+        training_optimizer = tf.contrib.tpu.CrossShardOptimizer(
            training_optimizer)
      # Optionally freeze some layers by setting their gradients to be zero.
@@ -380,9 +363,14 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
        eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
            eval_metrics, category_index.values(), eval_dict,
            include_metrics_per_category=False)
+        for loss_key, loss_tensor in iter(losses_dict.items()):
+          eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
+        for var in optimizer_summary_vars:
+          eval_metric_ops[var.op.name] = (var, tf.no_op())
        if img_summary is not None:
          eval_metric_ops['Detections_Left_Groundtruth_Right'] = (
              img_summary, tf.no_op())
+        eval_metric_ops = {str(k): v for k, v in eval_metric_ops.iteritems()}
    if use_tpu:
      return tf.contrib.tpu.TPUEstimatorSpec(
@@ -405,33 +393,18 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
  return model_fn
-def build_experiment_fn(train_steps, eval_steps):
+def create_estimator_and_inputs(run_config,
-  """Returns a function that creates an `Experiment`."""
+                                hparams,
+                                pipeline_config_path,
-  def build_experiment(run_config, hparams):
+                                train_steps=None,
-    """Builds an `Experiment` from configuration and hyperparameters.
+                                eval_steps=None,
+                                model_fn_creator=create_model_fn,
-    Args:
+                                use_tpu_estimator=False,
-      run_config: A `RunConfig`.
+                                use_tpu=False,
-      hparams: A `HParams`.
+                                num_shards=1,
+                                params=None,
-    Returns:
+                                **kwargs):
-      An `Experiment` object.
+  """Creates `Estimator`, input functions, and steps.
-    """
-    return populate_experiment(run_config, hparams, FLAGS.pipeline_config_path,
-                               train_steps, eval_steps)
-  return build_experiment
-def populate_experiment(run_config,
-                        hparams,
-                        pipeline_config_path,
-                        train_steps=None,
-                        eval_steps=None,
-                        model_fn_creator=create_model_fn,
-                        **kwargs):
-  """Populates an `Experiment` object.
  Args:
    run_config: A `RunConfig`.
@@ -451,18 +424,34 @@ def populate_experiment(run_config,
      * Returns:
        `model_fn` for `Estimator`.
+    use_tpu_estimator: Whether a `TPUEstimator` should be returned. If False,
+      an `Estimator` will be returned.
+    use_tpu: Boolean, whether training and evaluation should run on TPU. Only
+      used if `use_tpu_estimator` is True.
+    num_shards: Number of shards (TPU cores). Only used if `use_tpu_estimator`
+      is True.
+    params: Parameter dictionary passed from the estimator. Only used if
+      `use_tpu_estimator` is True.
    **kwargs: Additional keyword arguments for configuration override.
  Returns:
-    An `Experiment` that defines all aspects of training, evaluation, and
+    A dictionary with the following fields:
-    export.
+    'estimator': An `Estimator` or `TPUEstimator`.
+    'train_input_fn': A training input function.
+    'eval_input_fn': An evaluation input function.
+    'eval_on_train_input_fn': An evaluation-on-train input function.
+    'predict_input_fn': A prediction input function.
+    'train_steps': Number of training steps. Either directly from input or from
+      configuration.
+    'eval_steps': Number of evaluation steps. Either directly from input or from
+      configuration.
  """
  get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
      'get_configs_from_pipeline_file']
-  create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
-      'create_pipeline_proto_from_configs']
  merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[
      'merge_external_params_with_configs']
+  create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
+      'create_pipeline_proto_from_configs']
  create_train_input_fn = MODEL_BUILD_UTIL_MAP['create_train_input_fn']
  create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn']
  create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn']
@@ -480,16 +469,16 @@ def populate_experiment(run_config,
  eval_config = configs['eval_config']
  eval_input_config = configs['eval_input_config']
-  if train_steps is None and train_config.num_steps:
+  if train_steps is None:
-    train_steps = train_config.num_steps
+    train_steps = configs['train_config'].num_steps
-  if eval_steps is None and eval_config.num_examples:
+  if eval_steps is None:
-    eval_steps = eval_config.num_examples
+    eval_steps = configs['eval_config'].num_examples
  detection_model_fn = functools.partial(
      model_builder.build, model_config=model_config)
-  # Create the input functions for TRAIN/EVAL.
+  # Create the input functions for TRAIN/EVAL/PREDICT.
  train_input_fn = create_train_input_fn(
      train_config=train_config,
      train_input_config=train_input_config,
@@ -498,51 +487,200 @@ def populate_experiment(run_config,
      eval_config=eval_config,
      eval_input_config=eval_input_config,
      model_config=model_config)
+  eval_on_train_input_fn = create_eval_input_fn(
+      eval_config=eval_config,
+      eval_input_config=train_input_config,
+      model_config=model_config)
+  predict_input_fn = create_predict_input_fn(model_config=model_config)
+  model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu)
+  if use_tpu_estimator:
+    estimator = tf.contrib.tpu.TPUEstimator(
+        model_fn=model_fn,
+        train_batch_size=train_config.batch_size,
+        # For each core, only batch size 1 is supported for eval.
+        eval_batch_size=num_shards * 1 if use_tpu else 1,
+        use_tpu=use_tpu,
+        config=run_config,
+        params=params if params else {})
+  else:
+    estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
-  export_strategies = [
+  # Write the as-run pipeline config to disk.
-      tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
-          serving_input_fn=create_predict_input_fn(
-              model_config=model_config))
-  ]
-  estimator = tf.estimator.Estimator(
-      model_fn=model_fn_creator(detection_model_fn, configs, hparams),
-      config=run_config)
  if run_config.is_chief:
-    # Store the final pipeline config for traceability.
    pipeline_config_final = create_pipeline_proto_from_configs(
        configs)
-    if not file_io.file_exists(estimator.model_dir):
+    config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir)
-      file_io.recursive_create_dir(estimator.model_dir)
-    pipeline_config_final_path = os.path.join(estimator.model_dir,
-                                              'pipeline.config')
-    config_text = text_format.MessageToString(pipeline_config_final)
-    with tf.gfile.Open(pipeline_config_final_path, 'wb') as f:
-      tf.logging.info('Writing as-run pipeline config file to %s',
-                      pipeline_config_final_path)
-      f.write(config_text)
-  return tf.contrib.learn.Experiment(
+  return dict(
      estimator=estimator,
      train_input_fn=train_input_fn,
      eval_input_fn=eval_input_fn,
+      eval_on_train_input_fn=eval_on_train_input_fn,
+      predict_input_fn=predict_input_fn,
      train_steps=train_steps,
-      eval_steps=eval_steps,
+      eval_steps=eval_steps)
-      export_strategies=export_strategies,
-      eval_delay_secs=120,)
+def create_train_and_eval_specs(train_input_fn,
+                                eval_input_fn,
+                                eval_on_train_input_fn,
+                                predict_input_fn,
+                                train_steps,
+                                eval_steps,
+                                eval_on_train_data=False,
+                                final_exporter_name='Servo',
+                                eval_spec_name='eval'):
+  """Creates a `TrainSpec` and `EvalSpec`s.
+  Args:
+    train_input_fn: Function that produces features and labels on train data.
+    eval_input_fn: Function that produces features and labels on eval data.
+    eval_on_train_input_fn: Function that produces features and labels for
+      evaluation on train data.
+    predict_input_fn: Function that produces features for inference.
+    train_steps: Number of training steps.
+    eval_steps: Number of eval steps.
+    eval_on_train_data: Whether to evaluate model on training data. Default is
+      False.
+    final_exporter_name: String name given to `FinalExporter`.
+    eval_spec_name: String name given to main `EvalSpec`.
+  Returns:
+    Tuple of `TrainSpec` and list of `EvalSpecs`. The first `EvalSpec` is for
+    evaluation data. If `eval_on_train_data` is True, the second `EvalSpec` in
+    the list will correspond to training data.
+  """
+  exporter = tf.estimator.FinalExporter(
+      name=final_exporter_name, serving_input_receiver_fn=predict_input_fn)
+  train_spec = tf.estimator.TrainSpec(
+      input_fn=train_input_fn, max_steps=train_steps)
+  eval_specs = [
+      tf.estimator.EvalSpec(
+          name=eval_spec_name,
+          input_fn=eval_input_fn,
+          steps=eval_steps,
+          exporters=exporter)
+  ]
+  if eval_on_train_data:
+    eval_specs.append(
+        tf.estimator.EvalSpec(
+            name='eval_on_train', input_fn=eval_on_train_input_fn,
+            steps=eval_steps))
+  return train_spec, eval_specs
-def main(unused_argv):
+def continuous_eval(estimator, model_dir, input_fn, eval_steps, train_steps,
-  tf.flags.mark_flag_as_required('model_dir')
+                    name):
-  tf.flags.mark_flag_as_required('pipeline_config_path')
+  """Perform continuous evaluation on checkpoints written to a model directory.
-  config = tf.contrib.learn.RunConfig(model_dir=FLAGS.model_dir)
-  learn_runner.run(
-      experiment_fn=build_experiment_fn(FLAGS.num_train_steps,
-                                        FLAGS.num_eval_steps),
-      run_config=config,
-      hparams=model_hparams.create_hparams())
+  Args:
+    estimator: Estimator object to use for evaluation.
+    model_dir: Model directory to read checkpoints for continuous evaluation.
+    input_fn: Input function to use for evaluation.
+    eval_steps: Number of steps to run during each evaluation.
+    train_steps: Number of training steps. This is used to infer the last
+      checkpoint and stop evaluation loop.
+    name: Namescope for eval summary.
+  """
+  def terminate_eval():
+    tf.logging.info('Terminating eval after 180 seconds of no checkpoints')
+    return True
+  for ckpt in tf.contrib.training.checkpoints_iterator(
+      model_dir, min_interval_secs=180, timeout=None,
+      timeout_fn=terminate_eval):
+    tf.logging.info('Starting Evaluation.')
+    try:
+      eval_results = estimator.evaluate(
+          input_fn=input_fn,
+          steps=eval_steps,
+          checkpoint_path=ckpt,
+          name=name)
+      tf.logging.info('Eval results: %s' % eval_results)
+      # Terminate eval job when final checkpoint is reached
+      current_step = int(os.path.basename(ckpt).split('-')[1])
+      if current_step >= train_steps:
+        tf.logging.info(
+            'Evaluation finished after training step %d' % current_step)
+        break
+    except tf.errors.NotFoundError:
+      tf.logging.info(
+          'Checkpoint %s no longer exists, skipping checkpoint' % ckpt)
+def populate_experiment(run_config,
+                        hparams,
+                        pipeline_config_path,
+                        train_steps=None,
+                        eval_steps=None,
+                        model_fn_creator=create_model_fn,
+                        **kwargs):
+  """Populates an `Experiment` object.
+  EXPERIMENT CLASS IS DEPRECATED. Please switch to
+  tf.estimator.train_and_evaluate. As an example, see model_main.py.
+  Args:
+    run_config: A `RunConfig`.
+    hparams: A `HParams`.
+    pipeline_config_path: A path to a pipeline config file.
+    train_steps: Number of training steps. If None, the number of training steps
+      is set from the `TrainConfig` proto.
+    eval_steps: Number of evaluation steps per evaluation cycle. If None, the
+      number of evaluation steps is set from the `EvalConfig` proto.
+    model_fn_creator: A function that creates a `model_fn` for `Estimator`.
+      Follows the signature:
+      * Args:
+        * `detection_model_fn`: Function that returns `DetectionModel` instance.
+        * `configs`: Dictionary of pipeline config objects.
+        * `hparams`: `HParams` object.
+      * Returns:
+        `model_fn` for `Estimator`.
-if __name__ == '__main__':
+    **kwargs: Additional keyword arguments for configuration override.
-  tf.app.run()
+  Returns:
+    An `Experiment` that defines all aspects of training, evaluation, and
+    export.
+  """
+  tf.logging.warning('Experiment is being deprecated. Please use '
+                     'tf.estimator.train_and_evaluate(). See model_main.py for '
+                     'an example.')
+  train_and_eval_dict = create_estimator_and_inputs(
+      run_config,
+      hparams,
+      pipeline_config_path,
+      train_steps=train_steps,
+      eval_steps=eval_steps,
+      model_fn_creator=model_fn_creator,
+      **kwargs)
+  estimator = train_and_eval_dict['estimator']
+  train_input_fn = train_and_eval_dict['train_input_fn']
+  eval_input_fn = train_and_eval_dict['eval_input_fn']
+  predict_input_fn = train_and_eval_dict['predict_input_fn']
+  train_steps = train_and_eval_dict['train_steps']
+  eval_steps = train_and_eval_dict['eval_steps']
+  export_strategies = [
+      tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
+          serving_input_fn=predict_input_fn)
+  ]
+  return tf.contrib.learn.Experiment(
+      estimator=estimator,
+      train_input_fn=train_input_fn,
+      eval_input_fn=eval_input_fn,
+      train_steps=train_steps,
+      eval_steps=eval_steps,
+      export_strategies=export_strategies,
+      eval_delay_secs=120,)
--- a/research/object_detection/model_test.py
+++ b/research/object_detection/model_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for object detection model."""
+"""Tests for object detection model library."""
 from __future__ import absolute_import
 from __future__ import division
@@ -24,16 +24,20 @@ import os
 import numpy as np
 import tensorflow as tf
+from tensorflow.contrib.tpu.python.tpu import tpu_config
+from tensorflow.contrib.tpu.python.tpu import tpu_estimator
 from object_detection import inputs
-from object_detection import model
 from object_detection import model_hparams
-from object_detection import model_test_util
+from object_detection import model_lib
 from object_detection.builders import model_builder
 from object_detection.core import standard_fields as fields
 from object_detection.utils import config_util
-MODEL_NAME_FOR_TEST = model_test_util.SSD_INCEPTION_MODEL_NAME
+# Model for test. Options are:
+# 'ssd_inception_v2_pets', 'faster_rcnn_resnet50_pets'
+MODEL_NAME_FOR_TEST = 'ssd_inception_v2_pets'
 def _get_data_path():
@@ -42,6 +46,12 @@ def _get_data_path():
                      'pets_examples.record')
+def get_pipeline_config_path(model_name):
+  """Returns path to the local pipeline config file."""
+  return os.path.join(tf.resource_loader.get_data_files_path(), 'samples',
+                      'configs', model_name + '.config')
 def _get_labelmap_path():
  """Returns an absolute path to label map file."""
  return os.path.join(tf.resource_loader.get_data_files_path(), 'data',
@@ -50,7 +60,7 @@ def _get_labelmap_path():
 def _get_configs_for_model(model_name):
  """Returns configurations for model."""
-  filename = model_test_util.GetPipelineConfigPath(model_name)
+  filename = get_pipeline_config_path(model_name)
  data_path = _get_data_path()
  label_map_path = _get_labelmap_path()
  configs = config_util.get_configs_from_pipeline_file(filename)
@@ -62,31 +72,37 @@ def _get_configs_for_model(model_name):
  return configs
-def setUpModule():
+class ModelLibTest(tf.test.TestCase):
-  model_test_util.InitializeFlags(MODEL_NAME_FOR_TEST)
-class ModelTflearnTest(tf.test.TestCase):
  @classmethod
  def setUpClass(cls):
    tf.reset_default_graph()
-  def _assert_outputs_for_train_eval(self, configs, mode, class_agnostic=False):
+  def _assert_model_fn_for_train_eval(self, configs, mode,
+                                      class_agnostic=False):
    model_config = configs['model']
    train_config = configs['train_config']
    with tf.Graph().as_default():
-      if mode == tf.estimator.ModeKeys.TRAIN:
+      if mode == 'train':
        features, labels = inputs.create_train_input_fn(
            configs['train_config'],
            configs['train_input_config'],
            configs['model'])()
+        model_mode = tf.estimator.ModeKeys.TRAIN
        batch_size = train_config.batch_size
-      else:
+      elif mode == 'eval':
        features, labels = inputs.create_eval_input_fn(
            configs['eval_config'],
            configs['eval_input_config'],
            configs['model'])()
+        model_mode = tf.estimator.ModeKeys.EVAL
+        batch_size = 1
+      elif mode == 'eval_on_train':
+        features, labels = inputs.create_eval_input_fn(
+            configs['eval_config'],
+            configs['train_input_config'],
+            configs['model'])()
+        model_mode = tf.estimator.ModeKeys.EVAL
        batch_size = 1
      detection_model_fn = functools.partial(
@@ -95,8 +111,8 @@ class ModelTflearnTest(tf.test.TestCase):
      hparams = model_hparams.create_hparams(
          hparams_overrides='load_pretrained=false')
-      model_fn = model.create_model_fn(detection_model_fn, configs, hparams)
+      model_fn = model_lib.create_model_fn(detection_model_fn, configs, hparams)
-      estimator_spec = model_fn(features, labels, mode)
+      estimator_spec = model_fn(features, labels, model_mode)
      self.assertIsNotNone(estimator_spec.loss)
      self.assertIsNotNone(estimator_spec.predictions)
@@ -114,11 +130,11 @@ class ModelTflearnTest(tf.test.TestCase):
      self.assertEqual(batch_size, detection_scores.shape.as_list()[0])
      self.assertEqual(tf.float32, detection_scores.dtype)
      self.assertEqual(tf.float32, num_detections.dtype)
-      if mode == tf.estimator.ModeKeys.TRAIN:
+      if model_mode == tf.estimator.ModeKeys.TRAIN:
        self.assertIsNotNone(estimator_spec.train_op)
      return estimator_spec
-  def _assert_outputs_for_predict(self, configs):
+  def _assert_model_fn_for_predict(self, configs):
    model_config = configs['model']
    with tf.Graph().as_default():
@@ -132,7 +148,7 @@ class ModelTflearnTest(tf.test.TestCase):
      hparams = model_hparams.create_hparams(
          hparams_overrides='load_pretrained=false')
-      model_fn = model.create_model_fn(detection_model_fn, configs, hparams)
+      model_fn = model_lib.create_model_fn(detection_model_fn, configs, hparams)
      estimator_spec = model_fn(features, None, tf.estimator.ModeKeys.PREDICT)
      self.assertIsNone(estimator_spec.loss)
@@ -142,27 +158,146 @@ class ModelTflearnTest(tf.test.TestCase):
      self.assertIn(tf.saved_model.signature_constants.PREDICT_METHOD_NAME,
                    estimator_spec.export_outputs)
-  def testModelFnInTrainMode(self):
+  def test_model_fn_in_train_mode(self):
    """Tests the model function in TRAIN mode."""
    configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
-    self._assert_outputs_for_train_eval(configs, tf.estimator.ModeKeys.TRAIN)
+    self._assert_model_fn_for_train_eval(configs, 'train')
-  def testModelFnInEvalMode(self):
+  def test_model_fn_in_eval_mode(self):
    """Tests the model function in EVAL mode."""
    configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
-    self._assert_outputs_for_train_eval(configs, tf.estimator.ModeKeys.EVAL)
+    self._assert_model_fn_for_train_eval(configs, 'eval')
-  def testModelFnInPredictMode(self):
+  def test_model_fn_in_eval_on_train_mode(self):
-    """Tests the model function in PREDICT mode."""
+    """Tests the model function in EVAL mode with train data."""
    configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
-    self._assert_outputs_for_predict(configs)
+    self._assert_model_fn_for_train_eval(configs, 'eval_on_train')
-  def testExperiment(self):
+  def test_model_fn_in_predict_mode(self):
+    """Tests the model function in PREDICT mode."""
+    configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
+    self._assert_model_fn_for_predict(configs)
+  def test_create_estimator_and_inputs(self):
+    """Tests that Estimator and input function are constructed correctly."""
+    run_config = tf.estimator.RunConfig()
+    hparams = model_hparams.create_hparams(
+        hparams_overrides='load_pretrained=false')
+    pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
+    train_steps = 20
+    eval_steps = 10
+    train_and_eval_dict = model_lib.create_estimator_and_inputs(
+        run_config,
+        hparams,
+        pipeline_config_path,
+        train_steps=train_steps,
+        eval_steps=eval_steps)
+    estimator = train_and_eval_dict['estimator']
+    train_steps = train_and_eval_dict['train_steps']
+    eval_steps = train_and_eval_dict['eval_steps']
+    self.assertIsInstance(estimator, tf.estimator.Estimator)
+    self.assertEqual(20, train_steps)
+    self.assertEqual(10, eval_steps)
+    self.assertIn('train_input_fn', train_and_eval_dict)
+    self.assertIn('eval_input_fn', train_and_eval_dict)
+    self.assertIn('eval_on_train_input_fn', train_and_eval_dict)
+  def test_create_estimator_with_default_train_eval_steps(self):
+    """Tests that number of train/eval defaults to config values."""
+    run_config = tf.estimator.RunConfig()
+    hparams = model_hparams.create_hparams(
+        hparams_overrides='load_pretrained=false')
+    pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
+    configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
+    config_train_steps = configs['train_config'].num_steps
+    config_eval_steps = configs['eval_config'].num_examples
+    train_and_eval_dict = model_lib.create_estimator_and_inputs(
+        run_config, hparams, pipeline_config_path)
+    estimator = train_and_eval_dict['estimator']
+    train_steps = train_and_eval_dict['train_steps']
+    eval_steps = train_and_eval_dict['eval_steps']
+    self.assertIsInstance(estimator, tf.estimator.Estimator)
+    self.assertEqual(config_train_steps, train_steps)
+    self.assertEqual(config_eval_steps, eval_steps)
+  def test_create_tpu_estimator_and_inputs(self):
+    """Tests that number of train/eval defaults to config values."""
+    run_config = tpu_config.RunConfig()
+    hparams = model_hparams.create_hparams(
+        hparams_overrides='load_pretrained=false')
+    pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
+    train_steps = 20
+    eval_steps = 10
+    train_and_eval_dict = model_lib.create_estimator_and_inputs(
+        run_config,
+        hparams,
+        pipeline_config_path,
+        train_steps=train_steps,
+        eval_steps=eval_steps,
+        use_tpu_estimator=True)
+    estimator = train_and_eval_dict['estimator']
+    train_steps = train_and_eval_dict['train_steps']
+    eval_steps = train_and_eval_dict['eval_steps']
+    self.assertIsInstance(estimator, tpu_estimator.TPUEstimator)
+    self.assertEqual(20, train_steps)
+    self.assertEqual(10, eval_steps)
+  def test_create_train_and_eval_specs(self):
+    """Tests that `TrainSpec` and `EvalSpec` is created correctly."""
+    run_config = tf.estimator.RunConfig()
+    hparams = model_hparams.create_hparams(
+        hparams_overrides='load_pretrained=false')
+    pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
+    train_steps = 20
+    eval_steps = 10
+    train_and_eval_dict = model_lib.create_estimator_and_inputs(
+        run_config,
+        hparams,
+        pipeline_config_path,
+        train_steps=train_steps,
+        eval_steps=eval_steps)
+    train_input_fn = train_and_eval_dict['train_input_fn']
+    eval_input_fn = train_and_eval_dict['eval_input_fn']
+    eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
+    predict_input_fn = train_and_eval_dict['predict_input_fn']
+    train_steps = train_and_eval_dict['train_steps']
+    eval_steps = train_and_eval_dict['eval_steps']
+    train_spec, eval_specs = model_lib.create_train_and_eval_specs(
+        train_input_fn,
+        eval_input_fn,
+        eval_on_train_input_fn,
+        predict_input_fn,
+        train_steps,
+        eval_steps,
+        eval_on_train_data=True,
+        final_exporter_name='exporter',
+        eval_spec_name='holdout')
+    self.assertEqual(train_steps, train_spec.max_steps)
+    self.assertEqual(2, len(eval_specs))
+    self.assertEqual(eval_steps, eval_specs[0].steps)
+    self.assertEqual('holdout', eval_specs[0].name)
+    self.assertEqual('exporter', eval_specs[0].exporters[0].name)
+    self.assertEqual(eval_steps, eval_specs[1].steps)
+    self.assertEqual('eval_on_train', eval_specs[1].name)
+  def test_experiment(self):
    """Tests that the `Experiment` object is constructed correctly."""
-    experiment = model_test_util.BuildExperiment()
+    run_config = tf.estimator.RunConfig()
-    model_dir = experiment.estimator.model_dir
+    hparams = model_hparams.create_hparams(
-    pipeline_config_path = os.path.join(model_dir, 'pipeline.config')
+        hparams_overrides='load_pretrained=false')
-    self.assertTrue(tf.gfile.Exists(pipeline_config_path))
+    pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST)
+    experiment = model_lib.populate_experiment(
+        run_config,
+        hparams,
+        pipeline_config_path,
+        train_steps=10,
+        eval_steps=20)
+    self.assertEqual(10, experiment.train_steps)
+    self.assertEqual(20, experiment.eval_steps)
 class UnbatchTensorsTest(tf.test.TestCase):
@@ -184,7 +319,7 @@ class UnbatchTensorsTest(tf.test.TestCase):
        fields.InputDataFields.groundtruth_weights:
            groundtruth_weights_placeholder
    }
-    unbatched_tensor_dict = model.unstack_batch(
+    unbatched_tensor_dict = model_lib.unstack_batch(
        tensor_dict, unpad_groundtruth_tensors=False)
    with self.test_session() as sess:
@@ -231,7 +366,7 @@ class UnbatchTensorsTest(tf.test.TestCase):
        fields.InputDataFields.num_groundtruth_boxes:
            num_groundtruth_placeholder
    }
-    unbatched_tensor_dict = model.unstack_batch(
+    unbatched_tensor_dict = model_lib.unstack_batch(
        tensor_dict, unpad_groundtruth_tensors=True)
    with self.test_session() as sess:
      unbatched_tensor_dict_out = sess.run(

--- a/research/object_detection/model_main.py
+++ b/research/object_detection/model_main.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Binary to run train and evaluation on object detection model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from absl import flags
+import tensorflow as tf
+from object_detection import model_hparams
+from object_detection import model_lib
+flags.DEFINE_string(
+    'model_dir', None, 'Path to output model directory '
+    'where event and checkpoint files will be written.')
+flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
+                    'file.')
+flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
+flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
+flags.DEFINE_string(
+    'hparams_overrides', None, 'Hyperparameter overrides, '
+    'represented as a string containing comma-separated '
+    'hparam_name=value pairs.')
+FLAGS = flags.FLAGS
+def main(unused_argv):
+  flags.mark_flag_as_required('model_dir')
+  flags.mark_flag_as_required('pipeline_config_path')
+  config = tf.estimator.RunConfig(model_dir=FLAGS.model_dir)
+  train_and_eval_dict = model_lib.create_estimator_and_inputs(
+      run_config=config,
+      hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
+      pipeline_config_path=FLAGS.pipeline_config_path,
+      train_steps=FLAGS.num_train_steps,
+      eval_steps=FLAGS.num_eval_steps)
+  estimator = train_and_eval_dict['estimator']
+  train_input_fn = train_and_eval_dict['train_input_fn']
+  eval_input_fn = train_and_eval_dict['eval_input_fn']
+  eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
+  predict_input_fn = train_and_eval_dict['predict_input_fn']
+  train_steps = train_and_eval_dict['train_steps']
+  eval_steps = train_and_eval_dict['eval_steps']
+  train_spec, eval_specs = model_lib.create_train_and_eval_specs(
+      train_input_fn,
+      eval_input_fn,
+      eval_on_train_input_fn,
+      predict_input_fn,
+      train_steps,
+      eval_steps,
+      eval_on_train_data=False)
+  # Currently only a single Eval Spec is allowed.
+  tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/object_detection/model_test_util.py
+++ b/research/object_detection/model_test_util.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Common utils for tests for object detection tflearn model."""
-from __future__ import absolute_import
-import os
-import tempfile
-import tensorflow as tf
-from object_detection import model
-from object_detection import model_hparams
-FLAGS = tf.flags.FLAGS
-FASTER_RCNN_MODEL_NAME = 'faster_rcnn_resnet50_pets'
-SSD_INCEPTION_MODEL_NAME = 'ssd_inception_v2_pets'
-def GetPipelineConfigPath(model_name):
-  """Returns path to the local pipeline config file."""
-  return os.path.join(tf.resource_loader.get_data_files_path(), 'samples',
-                      'configs', model_name + '.config')
-def InitializeFlags(model_name_for_test):
-  FLAGS.model_dir = tempfile.mkdtemp()
-  FLAGS.pipeline_config_path = GetPipelineConfigPath(model_name_for_test)
-def BuildExperiment():
-  """Builds an Experiment object for testing purposes."""
-  run_config = tf.contrib.learn.RunConfig()
-  hparams = model_hparams.create_hparams(
-      hparams_overrides='load_pretrained=false')
-  # pylint: disable=protected-access
-  experiment_fn = model.build_experiment_fn(10, 10)
-  # pylint: enable=protected-access
-  return experiment_fn(run_config, hparams)
--- a/research/object_detection/model_tpu.py
+++ b/research/object_detection/model_tpu.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Creates and runs `Estimator` for object detection model on TPUs.
-This uses the TPUEstimator API to define and run a model in TRAIN/EVAL modes.
-"""
-# pylint: enable=line-too-long
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import functools
-import os
-import tensorflow as tf
-from tensorflow.contrib.tpu.python.tpu import tpu_config
-from tensorflow.contrib.tpu.python.tpu import tpu_estimator
-from tensorflow.contrib.training.python.training import evaluation
-from object_detection import inputs
-from object_detection import model
-from object_detection import model_hparams
-from object_detection.builders import model_builder
-from object_detection.utils import config_util
-tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
-# Cloud TPU Cluster Resolvers
-tf.flags.DEFINE_string(
-    'gcp_project',
-    default=None,
-    help='Project name for the Cloud TPU-enabled project. If not specified, we '
-    'will attempt to automatically detect the GCE project from metadata.')
-tf.flags.DEFINE_string(
-    'tpu_zone',
-    default=None,
-    help='GCE zone where the Cloud TPU is located in. If not specified, we '
-    'will attempt to automatically detect the GCE project from metadata.')
-tf.flags.DEFINE_string(
-    'tpu_name',
-    default=None,
-    help='Name of the Cloud TPU for Cluster Resolvers. You must specify either '
-    'this flag or --master.')
-tf.flags.DEFINE_string(
-    'master', default=None,
-    help='GRPC URL of the master (e.g. grpc://ip.address.of.tpu:8470). You '
-    'must specify either this flag or --tpu_name.')
-tf.flags.DEFINE_integer('num_shards', 8, 'Number of shards (TPU cores).')
-tf.flags.DEFINE_integer('iterations_per_loop', 100,
-                        'Number of iterations per TPU training loop.')
-# For mode=train_and_eval, evaluation occurs after training is finished.
-# Note: independently of steps_per_checkpoint, estimator will save the most
-# recent checkpoint every 10 minutes by default for train_and_eval
-tf.flags.DEFINE_string('mode', 'train_and_eval',
-                       'Mode to run: train, eval, train_and_eval')
-tf.flags.DEFINE_integer('train_batch_size', 32 * 8, 'Batch size for training.')
-# For EVAL.
-tf.flags.DEFINE_integer('min_eval_interval_secs', 180,
-                        'Minimum seconds between evaluations.')
-tf.flags.DEFINE_integer(
-    'eval_timeout_secs', None,
-    'Maximum seconds between checkpoints before evaluation terminates.')
-tf.flags.DEFINE_string('hparams_overrides', None, 'Comma-separated list of '
-                       'hyperparameters to override defaults.')
-tf.flags.DEFINE_boolean('eval_training_data', False,
-                        'If training data should be evaluated for this job.')
-FLAGS = tf.flags.FLAGS
-def create_estimator(run_config,
-                     hparams,
-                     pipeline_config_path,
-                     train_steps=None,
-                     eval_steps=None,
-                     train_batch_size=None,
-                     model_fn_creator=model.create_model_fn,
-                     use_tpu=False,
-                     num_shards=1,
-                     params=None,
-                     **kwargs):
-  """Creates an `Estimator` object.
-  Args:
-    run_config: A `RunConfig`.
-    hparams: A `HParams`.
-    pipeline_config_path: A path to a pipeline config file.
-    train_steps: Number of training steps. If None, the number of training steps
-      is set from the `TrainConfig` proto.
-    eval_steps: Number of evaluation steps per evaluation cycle. If None, the
-      number of evaluation steps is set from the `EvalConfig` proto.
-    train_batch_size: Training batch size. If none, use batch size from
-      `TrainConfig` proto.
-    model_fn_creator: A function that creates a `model_fn` for `Estimator`.
-      Follows the signature:
-      * Args:
-        * `detection_model_fn`: Function that returns `DetectionModel` instance.
-        * `configs`: Dictionary of pipeline config objects.
-        * `hparams`: `HParams` object.
-      * Returns:
-        `model_fn` for `Estimator`.
-    use_tpu: Boolean, whether training and evaluation should run on TPU.
-    num_shards: Number of shards (TPU cores).
-    params: Parameter dictionary passed from the estimator.
-    **kwargs: Additional keyword arguments for configuration override.
-  Returns:
-    Estimator: A estimator object used for training and evaluation
-    train_input_fn: Input function for the training loop
-    eval_validation_input_fn: Input function to run for evaluation on
-      validation data.
-    eval_training_input_fn: Input function to run for evaluation on
-      training data.
-    train_steps: Number of training steps either from arg `train_steps` or
-      `TrainConfig` proto
-    eval_steps: Number of evaluation steps either from arg `eval_steps` or
-      `EvalConfig` proto
-  """
-  configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
-  configs = config_util.merge_external_params_with_configs(
-      configs,
-      hparams,
-      train_steps=train_steps,
-      eval_steps=eval_steps,
-      batch_size=train_batch_size,
-      **kwargs)
-  model_config = configs['model']
-  train_config = configs['train_config']
-  train_input_config = configs['train_input_config']
-  eval_config = configs['eval_config']
-  eval_input_config = configs['eval_input_config']
-  if FLAGS.eval_training_data:
-    eval_input_config = configs['train_input_config']
-  if params is None:
-    params = {}
-  if train_steps is None and train_config.num_steps:
-    train_steps = train_config.num_steps
-  if eval_steps is None and eval_config.num_examples:
-    eval_steps = eval_config.num_examples
-  detection_model_fn = functools.partial(
-      model_builder.build, model_config=model_config)
-  # Create the input functions for TRAIN/EVAL.
-  train_input_fn = inputs.create_train_input_fn(
-      train_config=train_config,
-      train_input_config=train_input_config,
-      model_config=model_config)
-  eval_validation_input_fn = inputs.create_eval_input_fn(
-      eval_config=eval_config,
-      eval_input_config=eval_input_config,
-      model_config=model_config)
-  eval_training_input_fn = inputs.create_eval_input_fn(
-      eval_config=eval_config,
-      eval_input_config=train_input_config,
-      model_config=model_config)
-  estimator = tpu_estimator.TPUEstimator(
-      model_fn=model_fn_creator(detection_model_fn, configs, hparams,
-                                use_tpu),
-      train_batch_size=train_config.batch_size,
-      # For each core, only batch size 1 is supported for eval.
-      eval_batch_size=num_shards * 1 if use_tpu else 1,
-      use_tpu=use_tpu,
-      config=run_config,
-      params=params)
-  return (estimator, train_input_fn, eval_validation_input_fn,
-          eval_training_input_fn, train_steps, eval_steps)
-def main(unused_argv):
-  tf.flags.mark_flag_as_required('model_dir')
-  tf.flags.mark_flag_as_required('pipeline_config_path')
-  if FLAGS.master is None and FLAGS.tpu_name is None:
-    raise RuntimeError('You must specify either --master or --tpu_name.')
-  if FLAGS.master is not None:
-    if FLAGS.tpu_name is not None:
-      tf.logging.warn('Both --master and --tpu_name are set. Ignoring '
-                      '--tpu_name and using --master.')
-    tpu_grpc_url = FLAGS.master
-  else:
-    tpu_cluster_resolver = (
-        tf.contrib.cluster_resolver.python.training.TPUClusterResolver(
-            tpu_names=[FLAGS.tpu_name],
-            zone=FLAGS.tpu_zone,
-            project=FLAGS.gcp_project))
-    tpu_grpc_url = tpu_cluster_resolver.get_master()
-  config = tpu_config.RunConfig(
-      master=tpu_grpc_url,
-      evaluation_master=tpu_grpc_url,
-      model_dir=FLAGS.model_dir,
-      tpu_config=tpu_config.TPUConfig(
-          iterations_per_loop=FLAGS.iterations_per_loop,
-          num_shards=FLAGS.num_shards))
-  params = {}
-  (estimator, train_input_fn, eval_validation_input_fn, eval_training_input_fn,
-   train_steps, eval_steps) = (
-       create_estimator(
-           config,
-           model_hparams.create_hparams(
-               hparams_overrides=FLAGS.hparams_overrides),
-           FLAGS.pipeline_config_path,
-           train_steps=FLAGS.num_train_steps,
-           eval_steps=FLAGS.num_eval_steps,
-           train_batch_size=FLAGS.train_batch_size,
-           use_tpu=FLAGS.use_tpu,
-           num_shards=FLAGS.num_shards,
-           params=params))
-  if FLAGS.mode in ['train', 'train_and_eval']:
-    estimator.train(input_fn=train_input_fn, max_steps=train_steps)
-  if FLAGS.mode == 'train_and_eval':
-    # Eval one time.
-    eval_results = estimator.evaluate(
-        input_fn=eval_validation_input_fn, steps=eval_steps)
-    tf.logging.info('Eval results: %s' % eval_results)
-  # Continuously evaluating.
-  if FLAGS.mode == 'eval':
-    def terminate_eval():
-      tf.logging.info('Terminating eval after %d seconds of no checkpoints' %
-                      FLAGS.eval_timeout_secs)
-      return True
-    # Run evaluation when there's a new checkpoint.
-    for ckpt in evaluation.checkpoints_iterator(
-        FLAGS.model_dir,
-        min_interval_secs=FLAGS.min_eval_interval_secs,
-        timeout=FLAGS.eval_timeout_secs,
-        timeout_fn=terminate_eval):
-      tf.logging.info('Starting to evaluate.')
-      if FLAGS.eval_training_data:
-        name = 'training_data'
-        input_fn = eval_training_input_fn
-      else:
-        name = 'validation_data'
-        input_fn = eval_validation_input_fn
-      try:
-        eval_results = estimator.evaluate(
-            input_fn=input_fn,
-            steps=eval_steps,
-            checkpoint_path=ckpt,
-            name=name)
-        tf.logging.info('Eval results: %s' % eval_results)
-        # Terminate eval job when final checkpoint is reached
-        current_step = int(os.path.basename(ckpt).split('-')[1])
-        if current_step >= train_steps:
-          tf.logging.info(
-              'Evaluation finished after training step %d' % current_step)
-          break
-      except tf.errors.NotFoundError:
-        tf.logging.info(
-            'Checkpoint %s no longer exists, skipping checkpoint' % ckpt)
-if __name__ == '__main__':
-  tf.app.run()
--- a/research/object_detection/model_tpu_main.py
+++ b/research/object_detection/model_tpu_main.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Creates and runs `Estimator` for object detection model on TPUs.
+This uses the TPUEstimator API to define and run a model in TRAIN/EVAL modes.
+"""
+# pylint: enable=line-too-long
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from absl import flags
+import tensorflow as tf
+from tensorflow.contrib.tpu.python.tpu import tpu_config
+from object_detection import model_hparams
+from object_detection import model_lib
+tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
+# Cloud TPU Cluster Resolvers
+flags.DEFINE_string(
+    'gcp_project',
+    default=None,
+    help='Project name for the Cloud TPU-enabled project. If not specified, we '
+    'will attempt to automatically detect the GCE project from metadata.')
+flags.DEFINE_string(
+    'tpu_zone',
+    default=None,
+    help='GCE zone where the Cloud TPU is located in. If not specified, we '
+    'will attempt to automatically detect the GCE project from metadata.')
+flags.DEFINE_string(
+    'tpu_name',
+    default=None,
+    help='Name of the Cloud TPU for Cluster Resolvers.')
+flags.DEFINE_integer('num_shards', 8, 'Number of shards (TPU cores).')
+flags.DEFINE_integer('iterations_per_loop', 100,
+                     'Number of iterations per TPU training loop.')
+# For mode=train_and_eval, evaluation occurs after training is finished.
+# Note: independently of steps_per_checkpoint, estimator will save the most
+# recent checkpoint every 10 minutes by default for train_and_eval
+flags.DEFINE_string('mode', 'train',
+                    'Mode to run: train, eval')
+flags.DEFINE_integer('train_batch_size', 32 * 8, 'Batch size for training.')
+flags.DEFINE_string(
+    'hparams_overrides', None, 'Comma-separated list of '
+    'hyperparameters to override defaults.')
+flags.DEFINE_boolean('eval_training_data', False,
+                     'If training data should be evaluated for this job.')
+flags.DEFINE_string(
+    'model_dir', None, 'Path to output model directory '
+    'where event and checkpoint files will be written.')
+flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
+                    'file.')
+flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
+flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
+FLAGS = tf.flags.FLAGS
+def main(unused_argv):
+  flags.mark_flag_as_required('model_dir')
+  flags.mark_flag_as_required('pipeline_config_path')
+  tpu_cluster_resolver = (
+      tf.contrib.cluster_resolver.python.training.TPUClusterResolver(
+          tpu_names=[FLAGS.tpu_name],
+          zone=FLAGS.tpu_zone,
+          project=FLAGS.gcp_project))
+  tpu_grpc_url = tpu_cluster_resolver.get_master()
+  config = tpu_config.RunConfig(
+      master=tpu_grpc_url,
+      evaluation_master=tpu_grpc_url,
+      model_dir=FLAGS.model_dir,
+      tpu_config=tpu_config.TPUConfig(
+          iterations_per_loop=FLAGS.iterations_per_loop,
+          num_shards=FLAGS.num_shards))
+  train_and_eval_dict = model_lib.create_estimator_and_inputs(
+      run_config=config,
+      hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
+      pipeline_config_path=FLAGS.pipeline_config_path,
+      train_steps=FLAGS.num_train_steps,
+      eval_steps=FLAGS.num_eval_steps,
+      use_tpu_estimator=True,
+      use_tpu=FLAGS.use_tpu,
+      num_shards=FLAGS.num_shards,
+      batch_size=FLAGS.train_batch_size)
+  estimator = train_and_eval_dict['estimator']
+  train_input_fn = train_and_eval_dict['train_input_fn']
+  eval_input_fn = train_and_eval_dict['eval_input_fn']
+  eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
+  train_steps = train_and_eval_dict['train_steps']
+  eval_steps = train_and_eval_dict['eval_steps']
+  if FLAGS.mode == 'train':
+    estimator.train(input_fn=train_input_fn, max_steps=train_steps)
+  # Continuously evaluating.
+  if FLAGS.mode == 'eval':
+    if FLAGS.eval_training_data:
+      name = 'training_data'
+      input_fn = eval_on_train_input_fn
+    else:
+      name = 'validation_data'
+      input_fn = eval_input_fn
+    model_lib.continuous_eval(estimator, FLAGS.model_dir, input_fn, eval_steps,
+                              train_steps, name)
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
@@ -17,16 +17,16 @@
 import tensorflow as tf
+from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
-from object_detection.models import ssd_mobilenet_v1_feature_extractor
+from object_detection.utils import context_manager
 from object_detection.utils import ops
 from nets import mobilenet_v1
 slim = tf.contrib.slim
-class EmbeddedSSDMobileNetV1FeatureExtractor(
+class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
-    ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor):
  """Embedded-friendly SSD Feature Extractor using MobilenetV1 features.
  This feature extractor is similar to SSD MobileNetV1 feature extractor, and
@@ -49,12 +49,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
               depth_multiplier,
               min_depth,
               pad_to_multiple,
-               conv_hyperparams,
+               conv_hyperparams_fn,
-               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
               use_depthwise=False,
-               inplace_batchnorm_update=False):
+               override_base_feature_extractor_hyperparams=False):
    """MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
    Args:
@@ -63,20 +62,16 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to. For EmbeddedSSD it must be set to 1.
-      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-      batch_norm_trainable:  Whether to update batch norm parameters during
+        and separable_conv2d ops in the layers that are added on top of the
-        training or not. When training with a small batch size
+        base feature extractor.
-        (e.g. 1), it is desirable to disable batch norm update and use
-        pretrained batch norm params.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
-      inplace_batchnorm_update: Whether to update batch_norm inplace during
+      override_base_feature_extractor_hyperparams: Whether to override
-        training. This is required for batch norm to work correctly on TPUs.
+        hyperparameters of the base feature extractor with the one from
-        When this is false, user must add a control dependency on
+        `conv_hyperparams_fn`.
-        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
-        norm moving average parameters.
    Raises:
      ValueError: upon invalid `pad_to_multiple` values.
@@ -87,10 +82,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
    super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable, reuse_weights,
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
-        use_explicit_padding, use_depthwise, inplace_batchnorm_update)
+        override_base_feature_extractor_hyperparams)
-  def _extract_features(self, preprocessed_inputs):
+  def preprocess(self, resized_inputs):
+    """SSD preprocessing.
+    Maps pixel values to the range [-1, 1].
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    return (2.0 / 255.0) * resized_inputs - 1.0
+  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.
    Args:
@@ -130,16 +140,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
        'use_depthwise': self._use_depthwise,
    }
-    with slim.arg_scope(self._conv_hyperparams):
+    with tf.variable_scope('MobilenetV1',
-      with slim.arg_scope([slim.batch_norm], fused=False):
+                           reuse=self._reuse_weights) as scope:
-        with tf.variable_scope('MobilenetV1',
+      with slim.arg_scope(
-                               reuse=self._reuse_weights) as scope:
+          mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
-          _, image_features = mobilenet_v1.mobilenet_v1_base(
+        with (slim.arg_scope(self._conv_hyperparams_fn())
-              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+              if self._override_base_feature_extractor_hyperparams
-              final_endpoint='Conv2d_13_pointwise',
+              else context_manager.IdentityContextManager()):
-              min_depth=self._min_depth,
+        # TODO(skligys): Enable fused batch norm once quantization supports it.
-              depth_multiplier=self._depth_multiplier,
+          with slim.arg_scope([slim.batch_norm], fused=False):
-              scope=scope)
+            _, image_features = mobilenet_v1.mobilenet_v1_base(
+                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+                final_endpoint='Conv2d_13_pointwise',
+                min_depth=self._min_depth,
+                depth_multiplier=self._depth_multiplier,
+                use_explicit_padding=self._use_explicit_padding,
+                scope=scope)
+      with slim.arg_scope(self._conv_hyperparams_fn()):
+        # TODO(skligys): Enable fused batch norm once quantization supports it.
+        with slim.arg_scope([slim.batch_norm], fused=False):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,

--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
@@ -25,7 +25,7 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                is_training=True, batch_norm_trainable=True):
+                                is_training=True):
    """Constructs a new feature extractor.
    Args:
@@ -33,18 +33,16 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      is_training: whether the network is in training mode.
-      batch_norm_trainable: whether to update batch norm parameters during
-        training.
    Returns:
      an ssd_meta_arch.SSDFeatureExtractor object.
    """
    min_depth = 32
-    conv_hyperparams = {}
    return (embedded_ssd_mobilenet_v1_feature_extractor.
            EmbeddedSSDMobileNetV1FeatureExtractor(
                is_training, depth_multiplier, min_depth, pad_to_multiple,
-                conv_hyperparams, batch_norm_trainable))
+                self.conv_hyperparams_fn,
+                override_base_feature_extractor_hyperparams=True))
  def test_extract_features_returns_correct_shapes_256(self):
    image_height = 256

--- a/research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor.py
@@ -17,11 +17,30 @@
 import tensorflow as tf
 from object_detection.meta_architectures import faster_rcnn_meta_arch
+from object_detection.utils import shape_utils
 from nets import mobilenet_v1
 slim = tf.contrib.slim
+_MOBILENET_V1_100_CONV_NO_LAST_STRIDE_DEFS = [
+    mobilenet_v1.Conv(kernel=[3, 3], stride=2, depth=32),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=64),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=128),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=128),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=256),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=256),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=512),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=1024),
+    mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=1024)
+]
 class FasterRCNNMobilenetV1FeatureExtractor(
    faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
  """Faster R-CNN Mobilenet V1 feature extractor implementation."""
@@ -33,7 +52,8 @@ class FasterRCNNMobilenetV1FeatureExtractor(
               reuse_weights=None,
               weight_decay=0.0,
               depth_multiplier=1.0,
-               min_depth=16):
+               min_depth=16,
+               skip_last_stride=False):
    """Constructor.
    Args:
@@ -44,6 +64,7 @@ class FasterRCNNMobilenetV1FeatureExtractor(
      weight_decay: See base class.
      depth_multiplier: float depth multiplier for feature extractor.
      min_depth: minimum feature extractor depth.
+      skip_last_stride: Skip the last stride if True.
    Raises:
      ValueError: If `first_stage_features_stride` is not 8 or 16.
@@ -52,6 +73,7 @@ class FasterRCNNMobilenetV1FeatureExtractor(
      raise ValueError('`first_stage_features_stride` must be 8 or 16.')
    self._depth_multiplier = depth_multiplier
    self._min_depth = min_depth
+    self._skip_last_stride = skip_last_stride
    super(FasterRCNNMobilenetV1FeatureExtractor, self).__init__(
        is_training, first_stage_features_stride, batch_norm_trainable,
        reuse_weights, weight_decay)
@@ -91,24 +113,25 @@ class FasterRCNNMobilenetV1FeatureExtractor(
    """
    preprocessed_inputs.get_shape().assert_has_rank(4)
-    shape_assert = tf.Assert(
+    preprocessed_inputs = shape_utils.check_min_image_dim(
-        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
+        min_dim=33, image_tensor=preprocessed_inputs)
-                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
-        ['image size must at least be 33 in both height and width.'])
+    with slim.arg_scope(
+        mobilenet_v1.mobilenet_v1_arg_scope(
-    with tf.control_dependencies([shape_assert]):
+            is_training=self._train_batch_norm,
-      with slim.arg_scope(
+            weight_decay=self._weight_decay)):
-          mobilenet_v1.mobilenet_v1_arg_scope(
+      with tf.variable_scope('MobilenetV1',
-              is_training=self._train_batch_norm,
+                             reuse=self._reuse_weights) as scope:
-              weight_decay=self._weight_decay)):
+        params = {}
-        with tf.variable_scope('MobilenetV1',
+        if self._skip_last_stride:
-                               reuse=self._reuse_weights) as scope:
+          params['conv_defs'] = _MOBILENET_V1_100_CONV_NO_LAST_STRIDE_DEFS
-          _, activations = mobilenet_v1.mobilenet_v1_base(
+        _, activations = mobilenet_v1.mobilenet_v1_base(
-              preprocessed_inputs,
+            preprocessed_inputs,
-              final_endpoint='Conv2d_11_pointwise',
+            final_endpoint='Conv2d_11_pointwise',
-              min_depth=self._min_depth,
+            min_depth=self._min_depth,
-              depth_multiplier=self._depth_multiplier,
+            depth_multiplier=self._depth_multiplier,
-              scope=scope)
+            scope=scope,
+            **params)
    return activations['Conv2d_11_pointwise'], activations
  def _extract_box_classifier_features(self, proposal_feature_maps, scope):

--- a/research/object_detection/models/ssd_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_feature_extractor_test.py
@@ -26,6 +26,10 @@ from object_detection.utils import test_case
 class SsdFeatureExtractorTestBase(test_case.TestCase):
+  def conv_hyperparams_fn(self):
+    with tf.contrib.slim.arg_scope([]) as sc:
+      return sc
  @abstractmethod
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
                                use_explicit_padding=False):

--- a/research/object_detection/models/ssd_inception_v2_feature_extractor.py
+++ b/research/object_detection/models/ssd_inception_v2_feature_extractor.py
@@ -33,12 +33,11 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               depth_multiplier,
               min_depth,
               pad_to_multiple,
-               conv_hyperparams,
+               conv_hyperparams_fn,
-               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
               use_depthwise=False,
-               inplace_batchnorm_update=False):
+               override_base_feature_extractor_hyperparams=False):
    """InceptionV2 Feature Extractor for SSD Models.
    Args:
@@ -47,25 +46,30 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-      batch_norm_trainable: Whether to update batch norm parameters during
+        and separable_conv2d ops in the layers that are added on top of the
-        training or not. When training with a small batch size
+        base feature extractor.
-        (e.g. 1), it is desirable to disable batch norm update and use
-        pretrained batch norm params.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
-      inplace_batchnorm_update: Whether to update batch_norm inplace during
+      override_base_feature_extractor_hyperparams: Whether to override
-        training. This is required for batch norm to work correctly on TPUs.
+        hyperparameters of the base feature extractor with the one from
-        When this is false, user must add a control dependency on
+        `conv_hyperparams_fn`.
-        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
-        norm moving average parameters.
+    Raises:
+      ValueError: If `override_base_feature_extractor_hyperparams` is False.
    """
    super(SSDInceptionV2FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable, reuse_weights,
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
-        use_explicit_padding, use_depthwise, inplace_batchnorm_update)
+        override_base_feature_extractor_hyperparams)
+    if not self._override_base_feature_extractor_hyperparams:
+      raise ValueError('SSD Inception V2 feature extractor always uses'
+                       'scope returned by `conv_hyperparams_fn` for both the '
+                       'base feature extractor and the additional layers '
+                       'added since there is no arg_scope defined for the base '
+                       'feature extractor.')
  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -82,7 +86,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    """
    return (2.0 / 255.0) * resized_inputs - 1.0
-  def _extract_features(self, preprocessed_inputs):
+  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.
    Args:
@@ -103,7 +107,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        'use_depthwise': self._use_depthwise,
    }
-    with slim.arg_scope(self._conv_hyperparams):
+    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('InceptionV2',
                             reuse=self._reuse_weights) as scope:
        _, image_features = inception_v2.inception_v2_base(

--- a/research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
@@ -25,7 +25,7 @@ class SsdInceptionV2FeatureExtractorTest(
    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                is_training=True, batch_norm_trainable=True):
+                                is_training=True):
    """Constructs a SsdInceptionV2FeatureExtractor.
    Args:
@@ -33,16 +33,15 @@ class SsdInceptionV2FeatureExtractorTest(
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      is_training: whether the network is in training mode.
-      batch_norm_trainable: Whether to update batch norm parameters during
-        training or not
    Returns:
      an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor.
    """
    min_depth = 32
-    conv_hyperparams = {}
    return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable)
+        self.conv_hyperparams_fn,
+        override_base_feature_extractor_hyperparams=True)
  def test_extract_features_returns_correct_shapes_128(self):
    image_height = 128

--- a/research/object_detection/models/ssd_inception_v3_feature_extractor.py
+++ b/research/object_detection/models/ssd_inception_v3_feature_extractor.py
@@ -33,12 +33,11 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               depth_multiplier,
               min_depth,
               pad_to_multiple,
-               conv_hyperparams,
+               conv_hyperparams_fn,
-               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
               use_depthwise=False,
-               inplace_batchnorm_update=False):
+               override_base_feature_extractor_hyperparams=False):
    """InceptionV3 Feature Extractor for SSD Models.
    Args:
@@ -47,25 +46,31 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-      batch_norm_trainable: Whether to update batch norm parameters during
+        and separable_conv2d ops in the layers that are added on top of the
-        training or not. When training with a small batch size
+        base feature extractor.
-        (e.g. 1), it is desirable to disable batch norm update and use
-        pretrained batch norm params.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
-      inplace_batchnorm_update: Whether to update batch_norm inplace during
+      override_base_feature_extractor_hyperparams: Whether to override
-        training. This is required for batch norm to work correctly on TPUs.
+        hyperparameters of the base feature extractor with the one from
-        When this is false, user must add a control dependency on
+        `conv_hyperparams_fn`.
-        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
-        norm moving average parameters.
+    Raises:
+      ValueError: If `override_base_feature_extractor_hyperparams` is False.
    """
    super(SSDInceptionV3FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable, reuse_weights,
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
-        use_explicit_padding, use_depthwise, inplace_batchnorm_update)
+        override_base_feature_extractor_hyperparams)
+    if not self._override_base_feature_extractor_hyperparams:
+      raise ValueError('SSD Inception V3 feature extractor always uses'
+                       'scope returned by `conv_hyperparams_fn` for both the '
+                       'base feature extractor and the additional layers '
+                       'added since there is no arg_scope defined for the base '
+                       'feature extractor.')
  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -82,7 +87,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    """
    return (2.0 / 255.0) * resized_inputs - 1.0
-  def _extract_features(self, preprocessed_inputs):
+  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.
    Args:
@@ -103,7 +108,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        'use_depthwise': self._use_depthwise,
    }
-    with slim.arg_scope(self._conv_hyperparams):
+    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope:
        _, image_features = inception_v3.inception_v3_base(
            ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),

--- a/research/object_detection/models/ssd_inception_v3_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_inception_v3_feature_extractor_test.py
@@ -25,7 +25,7 @@ class SsdInceptionV3FeatureExtractorTest(
    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                is_training=True, batch_norm_trainable=True):
+                                is_training=True):
    """Constructs a SsdInceptionV3FeatureExtractor.
    Args:
@@ -33,16 +33,15 @@ class SsdInceptionV3FeatureExtractorTest(
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      is_training: whether the network is in training mode.
-      batch_norm_trainable: Whether to update batch norm parameters during
-        training or not
    Returns:
      an ssd_inception_v3_feature_extractor.SsdInceptionV3FeatureExtractor.
    """
    min_depth = 32
-    conv_hyperparams = {}
    return ssd_inception_v3_feature_extractor.SSDInceptionV3FeatureExtractor(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable)
+        self.conv_hyperparams_fn,
+        override_base_feature_extractor_hyperparams=True)
  def test_extract_features_returns_correct_shapes_128(self):
    image_height = 128

--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
@@ -19,6 +19,7 @@ import tensorflow as tf
 from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
+from object_detection.utils import context_manager
 from object_detection.utils import ops
 from object_detection.utils import shape_utils
 from nets import mobilenet_v1
@@ -34,12 +35,11 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               depth_multiplier,
               min_depth,
               pad_to_multiple,
-               conv_hyperparams,
+               conv_hyperparams_fn,
-               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
               use_depthwise=False,
-               inplace_batchnorm_update=False):
+               override_base_feature_extractor_hyperparams=False):
    """MobileNetV1 Feature Extractor for SSD Models.
    Args:
@@ -48,26 +48,22 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-      batch_norm_trainable: Whether to update batch norm parameters during
+        and separable_conv2d ops in the layers that are added on top of the
-        training or not. When training with a small batch size
+        base feature extractor.
-        (e.g. 1), it is desirable to disable batch norm update and use
-        pretrained batch norm params.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
        inputs so that the output dimensions are the same as if 'SAME' padding
        were used.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
-      inplace_batchnorm_update: Whether to update batch_norm inplace during
+      override_base_feature_extractor_hyperparams: Whether to override
-        training. This is required for batch norm to work correctly on TPUs.
+        hyperparameters of the base feature extractor with the one from
-        When this is false, user must add a control dependency on
+        `conv_hyperparams_fn`.
-        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
-        norm moving average parameters.
    """
    super(SSDMobileNetV1FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable, reuse_weights,
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
-        use_explicit_padding, use_depthwise, inplace_batchnorm_update)
+        override_base_feature_extractor_hyperparams)
  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -84,7 +80,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    """
    return (2.0 / 255.0) * resized_inputs - 1.0
-  def _extract_features(self, preprocessed_inputs):
+  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.
    Args:
@@ -110,17 +106,20 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
-              is_training=(self._batch_norm_trainable and self._is_training))):
+              is_training=None, regularize_depthwise=True)):
+        with (slim.arg_scope(self._conv_hyperparams_fn())
+              if self._override_base_feature_extractor_hyperparams
+              else context_manager.IdentityContextManager()):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
-        with slim.arg_scope([slim.batch_norm], fused=False):
+          with slim.arg_scope([slim.batch_norm], fused=False):
-          _, image_features = mobilenet_v1.mobilenet_v1_base(
+            _, image_features = mobilenet_v1.mobilenet_v1_base(
-              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
-              final_endpoint='Conv2d_13_pointwise',
+                final_endpoint='Conv2d_13_pointwise',
-              min_depth=self._min_depth,
+                min_depth=self._min_depth,
-              depth_multiplier=self._depth_multiplier,
+                depth_multiplier=self._depth_multiplier,
-              use_explicit_padding=self._use_explicit_padding,
+                use_explicit_padding=self._use_explicit_padding,
-              scope=scope)
+                scope=scope)
-      with slim.arg_scope(self._conv_hyperparams):
+      with slim.arg_scope(self._conv_hyperparams_fn()):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
        with slim.arg_scope([slim.batch_norm], fused=False):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(

--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
@@ -27,8 +27,7 @@ class SsdMobilenetV1FeatureExtractorTest(
    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                is_training=True, batch_norm_trainable=True,
+                                is_training=True, use_explicit_padding=False):
-                                use_explicit_padding=False):
    """Constructs a new feature extractor.
    Args:
@@ -36,8 +35,6 @@ class SsdMobilenetV1FeatureExtractorTest(
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      is_training: whether the network is in training mode.
-      batch_norm_trainable: Whether to update batch norm parameters during
-        training or not.
      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
        inputs so that the output dimensions are the same as if 'SAME' padding
        were used.
@@ -45,11 +42,9 @@ class SsdMobilenetV1FeatureExtractorTest(
      an ssd_meta_arch.SSDFeatureExtractor object.
    """
    min_depth = 32
-    with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc:
-      conv_hyperparams = sc
    return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable=batch_norm_trainable,
+        self.conv_hyperparams_fn,
        use_explicit_padding=use_explicit_padding)
  def test_extract_features_returns_correct_shapes_128(self):

--- a/research/object_detection/models/ssd_mobilenet_v2_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_feature_extractor.py
@@ -19,6 +19,7 @@ import tensorflow as tf
 from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
+from object_detection.utils import context_manager
 from object_detection.utils import ops
 from object_detection.utils import shape_utils
 from nets.mobilenet import mobilenet
@@ -35,12 +36,11 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               depth_multiplier,
               min_depth,
               pad_to_multiple,
-               conv_hyperparams,
+               conv_hyperparams_fn,
-               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
               use_depthwise=False,
-               inplace_batchnorm_update=False):
+               override_base_feature_extractor_hyperparams=False):
    """MobileNetV2 Feature Extractor for SSD Models.
    Mobilenet v2 (experimental), designed by sandler@. More details can be found
@@ -52,25 +52,21 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
-      batch_norm_trainable:  Whether to update batch norm parameters during
+        and separable_conv2d ops in the layers that are added on top of the
-        training or not. When training with a small batch size
+        base feature extractor.
-        (e.g. 1), it is desirable to disable batch norm update and use
-        pretrained batch norm params.
      reuse_weights: Whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
-      inplace_batchnorm_update: Whether to update batch_norm inplace during
+      override_base_feature_extractor_hyperparams: Whether to override
-        training. This is required for batch norm to work correctly on TPUs.
+        hyperparameters of the base feature extractor with the one from
-        When this is false, user must add a control dependency on
+        `conv_hyperparams_fn`.
-        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
-        norm moving average parameters.
    """
    super(SSDMobileNetV2FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable, reuse_weights,
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
-        use_explicit_padding, use_depthwise, inplace_batchnorm_update)
+        override_base_feature_extractor_hyperparams)
  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -87,7 +83,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    """
    return (2.0 / 255.0) * resized_inputs - 1.0
-  def _extract_features(self, preprocessed_inputs):
+  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.
    Args:
@@ -110,21 +106,22 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
-          mobilenet_v2.training_scope(
+          mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
-              is_training=(self._is_training and self._batch_norm_trainable),
-              bn_decay=0.9997)), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
-        # TODO(b/68150321): Enable fused batch norm once quantization
+        with (slim.arg_scope(self._conv_hyperparams_fn())
-        # supports it.
+              if self._override_base_feature_extractor_hyperparams else
-        with slim.arg_scope([slim.batch_norm], fused=False):
+              context_manager.IdentityContextManager()):
-          _, image_features = mobilenet_v2.mobilenet_base(
+          # TODO(b/68150321): Enable fused batch norm once quantization
-              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+          # supports it.
-              final_endpoint='layer_19',
+          with slim.arg_scope([slim.batch_norm], fused=False):
-              depth_multiplier=self._depth_multiplier,
+            _, image_features = mobilenet_v2.mobilenet_base(
-              use_explicit_padding=self._use_explicit_padding,
+                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
-              scope=scope)
+                final_endpoint='layer_19',
-        with slim.arg_scope(self._conv_hyperparams):
+                depth_multiplier=self._depth_multiplier,
+                use_explicit_padding=self._use_explicit_padding,
+                scope=scope)
+        with slim.arg_scope(self._conv_hyperparams_fn()):
          # TODO(b/68150321): Enable fused batch norm once quantization
          # supports it.
          with slim.arg_scope([slim.batch_norm], fused=False):

--- a/research/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_feature_extractor_test.py
@@ -41,14 +41,12 @@ class SsdMobilenetV2FeatureExtractorTest(
      an ssd_meta_arch.SSDFeatureExtractor object.
    """
    min_depth = 32
-    with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) as sc:
-      conv_hyperparams = sc
    return ssd_mobilenet_v2_feature_extractor.SSDMobileNetV2FeatureExtractor(
        False,
        depth_multiplier,
        min_depth,
        pad_to_multiple,
-        conv_hyperparams,
+        self.conv_hyperparams_fn,
        use_explicit_padding=use_explicit_padding)
  def test_extract_features_returns_correct_shapes_128(self):