Merge pull request #3293 from pkulzc/master

Internal changes of object_detection

Merge pull request #3293 from pkulzc/master
Internal changes of object_detection
fd7b6887 · Jonathan Huang · GitHub · f98ec55e · 1efe98bb · fd7b6887
Unverified Commit fd7b6887 authored Feb 09, 2018 by Jonathan Huang Committed by GitHub Feb 09, 2018
20 changed files
--- a/research/object_detection/model.py
+++ b/research/object_detection/model.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Creates and runs `Experiment` for object detection model.
+This uses the TF.learn framework to define and run an object detection model
+wrapped in an `Estimator`.
+Note that this module is only compatible with SSD Meta architecture at the
+moment.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import functools
+import os
+import tensorflow as tf
+from google.protobuf import text_format
+from tensorflow.contrib.learn.python.learn import learn_runner
+from tensorflow.contrib.tpu.python.tpu import tpu_optimizer
+from object_detection import eval_util
+from object_detection import inputs
+from object_detection import model_hparams
+from object_detection.builders import model_builder
+from object_detection.builders import optimizer_builder
+from object_detection.core import standard_fields as fields
+from object_detection.metrics import coco_evaluation
+from object_detection.utils import config_util
+from object_detection.utils import label_map_util
+from object_detection.utils import shape_utils
+from object_detection.utils import variables_helper
+from object_detection.utils import visualization_utils as vis_utils
+tf.flags.DEFINE_string('model_dir', None, 'Path to output model directory '
+                       'where event and checkpoint files will be written.')
+tf.flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
+                       'file.')
+tf.flags.DEFINE_integer('num_train_steps', 500000, 'Number of train steps.')
+tf.flags.DEFINE_integer('num_eval_steps', 10000, 'Number of train steps.')
+FLAGS = tf.flags.FLAGS
+def _get_groundtruth_data(detection_model, class_agnostic):
+  """Extracts groundtruth data from detection_model.
+  Args:
+    detection_model: A `DetectionModel` object.
+    class_agnostic: Whether the detections are class_agnostic.
+  Returns:
+    A tuple of:
+    groundtruth: Dictionary with the following fields:
+      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
+        normalized coordinates.
+      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
+      'groundtruth_masks': 3D float32 tensor of instance masks (if provided in
+        groundtruth)
+    class_agnostic: Boolean indicating whether detections are class agnostic.
+  """
+  input_data_fields = fields.InputDataFields()
+  groundtruth_boxes = detection_model.groundtruth_lists(
+      fields.BoxListFields.boxes)[0]
+  # For class-agnostic models, groundtruth one-hot encodings collapse to all
+  # ones.
+  if class_agnostic:
+    groundtruth_boxes_shape = tf.shape(groundtruth_boxes)
+    groundtruth_classes_one_hot = tf.ones([groundtruth_boxes_shape[0], 1])
+  else:
+    groundtruth_classes_one_hot = detection_model.groundtruth_lists(
+        fields.BoxListFields.classes)[0]
+  label_id_offset = 1  # Applying label id offset (b/63711816)
+  groundtruth_classes = (
+      tf.argmax(groundtruth_classes_one_hot, axis=1) + label_id_offset)
+  groundtruth = {
+      input_data_fields.groundtruth_boxes: groundtruth_boxes,
+      input_data_fields.groundtruth_classes: groundtruth_classes
+  }
+  if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
+    groundtruth[input_data_fields.groundtruth_instance_masks] = (
+        detection_model.groundtruth_lists(fields.BoxListFields.masks)[0])
+  return groundtruth
+def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
+  """Unstacks all tensors in `tensor_dict` along 0th dimension.
+  Unstacks tensor from the tensor dict along 0th dimension and returns a
+  tensor_dict containing values that are lists of unstacked tensors.
+  Tensors in the `tensor_dict` are expected to be of one of the three shapes:
+  1. [batch_size]
+  2. [batch_size, height, width, channels]
+  3. [batch_size, num_boxes, d1, d2, ... dn]
+  When unpad_tensors is set to true, unstacked tensors of form 3 above are
+  sliced along the `num_boxes` dimension using the value in tensor
+  field.InputDataFields.num_groundtruth_boxes.
+  Note that this function has a static list of input data fields and has to be
+  kept in sync with the InputDataFields defined in core/standard_fields.py
+  Args:
+    tensor_dict: A dictionary of batched groundtruth tensors.
+    unpad_groundtruth_tensors: Whether to remove padding along `num_boxes`
+      dimension of the groundtruth tensors.
+  Returns:
+    A dictionary where the keys are from fields.InputDataFields and values are
+    a list of unstacked (optionally unpadded) tensors.
+  Raises:
+    ValueError: If unpad_tensors is True and `tensor_dict` does not contain
+      `num_groundtruth_boxes` tensor.
+  """
+  unbatched_tensor_dict = {key: tf.unstack(tensor)
+                           for key, tensor in tensor_dict.items()}
+  if unpad_groundtruth_tensors:
+    if (fields.InputDataFields.num_groundtruth_boxes not in
+        unbatched_tensor_dict):
+      raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. '
+                       'Keys available: {}'.format(
+                           unbatched_tensor_dict.keys()))
+    unbatched_unpadded_tensor_dict = {}
+    unpad_keys = set([
+        # List of input data fields that are padded along the num_boxes
+        # dimension. This list has to be kept in sync with InputDataFields in
+        # standard_fields.py.
+        fields.InputDataFields.groundtruth_instance_masks,
+        fields.InputDataFields.groundtruth_classes,
+        fields.InputDataFields.groundtruth_boxes,
+        fields.InputDataFields.groundtruth_keypoints,
+        fields.InputDataFields.groundtruth_group_of,
+        fields.InputDataFields.groundtruth_difficult,
+        fields.InputDataFields.groundtruth_is_crowd,
+        fields.InputDataFields.groundtruth_area,
+        fields.InputDataFields.groundtruth_weights
+    ]).intersection(set(unbatched_tensor_dict.keys()))
+    for key in unpad_keys:
+      unpadded_tensor_list = []
+      for num_gt, padded_tensor in zip(
+          unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
+          unbatched_tensor_dict[key]):
+        tensor_shape = shape_utils.combined_static_and_dynamic_shape(
+            padded_tensor)
+        slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32)
+        slice_size = tf.stack(
+            [num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]])
+        unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size)
+        unpadded_tensor_list.append(unpadded_tensor)
+      unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list
+    unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict)
+  return unbatched_tensor_dict
+def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
+  """Creates a model function for `Estimator`.
+  Args:
+    detection_model_fn: Function that returns a `DetectionModel` instance.
+    configs: Dictionary of pipeline config objects.
+    hparams: `HParams` object.
+    use_tpu: Boolean indicating whether model should be constructed for
+        use on TPU.
+  Returns:
+    `model_fn` for `Estimator`.
+  """
+  train_config = configs['train_config']
+  eval_input_config = configs['eval_input_config']
+  def model_fn(features, labels, mode, params=None):
+    """Constructs the object detection model.
+    Args:
+      features: Dictionary of feature tensors, returned from `input_fn`.
+      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
+        otherwise None.
+      mode: Mode key from tf.estimator.ModeKeys.
+      params: Parameter dictionary passed from the estimator.
+    Returns:
+      An `EstimatorSpec` that encapsulates the model and its serving
+        configurations.
+    """
+    params = params or {}
+    total_loss, train_op, detections, export_outputs = None, None, None, None
+    is_training = mode == tf.estimator.ModeKeys.TRAIN
+    detection_model = detection_model_fn(is_training=is_training,
+                                         add_summaries=(not use_tpu))
+    scaffold_fn = None
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      labels = unstack_batch(
+          labels,
+          unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
+    elif mode == tf.estimator.ModeKeys.EVAL:
+      labels = unstack_batch(labels, unpad_groundtruth_tensors=False)
+    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
+      gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
+      gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
+      gt_masks_list = None
+      if fields.InputDataFields.groundtruth_instance_masks in labels:
+        gt_masks_list = labels[
+            fields.InputDataFields.groundtruth_instance_masks]
+      gt_keypoints_list = None
+      if fields.InputDataFields.groundtruth_keypoints in labels:
+        gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
+      detection_model.provide_groundtruth(
+          groundtruth_boxes_list=gt_boxes_list,
+          groundtruth_classes_list=gt_classes_list,
+          groundtruth_masks_list=gt_masks_list,
+          groundtruth_keypoints_list=gt_keypoints_list)
+    preprocessed_images = features[fields.InputDataFields.image]
+    prediction_dict = detection_model.predict(
+        preprocessed_images, features[fields.InputDataFields.true_image_shape])
+    detections = detection_model.postprocess(
+        prediction_dict, features[fields.InputDataFields.true_image_shape])
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      if train_config.fine_tune_checkpoint and hparams.load_pretrained:
+        asg_map = detection_model.restore_map(
+            from_detection_checkpoint=train_config.from_detection_checkpoint,
+            load_all_detection_checkpoint_vars=(
+                train_config.load_all_detection_checkpoint_vars))
+        available_var_map = (
+            variables_helper.get_variables_available_in_checkpoint(
+                asg_map, train_config.fine_tune_checkpoint,
+                include_global_step=False))
+        if use_tpu:
+          def tpu_scaffold():
+            tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
+                                          available_var_map)
+            return tf.train.Scaffold()
+          scaffold_fn = tpu_scaffold
+        else:
+          tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
+                                        available_var_map)
+    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
+      losses_dict = detection_model.loss(
+          prediction_dict, features[fields.InputDataFields.true_image_shape])
+      losses = [loss_tensor for loss_tensor in losses_dict.itervalues()]
+      total_loss = tf.add_n(losses, name='total_loss')
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      global_step = tf.train.get_or_create_global_step()
+      training_optimizer, optimizer_summary_vars = optimizer_builder.build(
+          train_config.optimizer)
+      if use_tpu:
+        training_optimizer = tpu_optimizer.CrossShardOptimizer(
+            training_optimizer)
+      # Optionally freeze some layers by setting their gradients to be zero.
+      trainable_variables = None
+      if train_config.freeze_variables:
+        trainable_variables = tf.contrib.framework.filter_variables(
+            tf.trainable_variables(),
+            exclude_patterns=train_config.freeze_variables)
+      clip_gradients_value = None
+      if train_config.gradient_clipping_by_norm > 0:
+        clip_gradients_value = train_config.gradient_clipping_by_norm
+      if not use_tpu:
+        for var in optimizer_summary_vars:
+          tf.summary.scalar(var.op.name, var)
+      summaries = [] if use_tpu else None
+      train_op = tf.contrib.layers.optimize_loss(
+          loss=total_loss,
+          global_step=global_step,
+          learning_rate=None,
+          clip_gradients=clip_gradients_value,
+          optimizer=training_optimizer,
+          variables=trainable_variables,
+          summaries=summaries,
+          name='')  # Preventing scope prefix on all variables.
+    if mode == tf.estimator.ModeKeys.PREDICT:
+      export_outputs = {
+          tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
+              tf.estimator.export.PredictOutput(detections)
+      }
+    eval_metric_ops = None
+    if mode == tf.estimator.ModeKeys.EVAL:
+      # Detection summaries during eval.
+      class_agnostic = (fields.DetectionResultFields.detection_classes
+                        not in detections)
+      groundtruth = _get_groundtruth_data(detection_model, class_agnostic)
+      eval_dict = eval_util.result_dict_for_single_example(
+          tf.expand_dims(features[fields.InputDataFields.original_image][0], 0),
+          features[inputs.HASH_KEY][0],
+          detections,
+          groundtruth,
+          class_agnostic=class_agnostic,
+          scale_to_absolute=False)
+      if class_agnostic:
+        category_index = label_map_util.create_class_agnostic_category_index()
+      else:
+        category_index = label_map_util.create_category_index_from_labelmap(
+            eval_input_config.label_map_path)
+      detection_and_groundtruth = vis_utils.draw_side_by_side_evaluation_image(
+          eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2)
+      if not use_tpu:
+        tf.summary.image('Detections_Left_Groundtruth_Right',
+                         detection_and_groundtruth)
+      # Eval metrics on a single image.
+      detection_fields = fields.DetectionResultFields()
+      input_data_fields = fields.InputDataFields()
+      coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
+          category_index.values())
+      eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(
+          image_id=eval_dict[input_data_fields.key],
+          groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes],
+          groundtruth_classes=eval_dict[input_data_fields.groundtruth_classes],
+          detection_boxes=eval_dict[detection_fields.detection_boxes],
+          detection_scores=eval_dict[detection_fields.detection_scores],
+          detection_classes=eval_dict[detection_fields.detection_classes])
+    if use_tpu:
+      return tf.contrib.tpu.TPUEstimatorSpec(
+          mode=mode,
+          scaffold_fn=scaffold_fn,
+          predictions=detections,
+          loss=total_loss,
+          train_op=train_op,
+          eval_metrics=eval_metric_ops,
+          export_outputs=export_outputs)
+    else:
+      return tf.estimator.EstimatorSpec(
+          mode=mode,
+          predictions=detections,
+          loss=total_loss,
+          train_op=train_op,
+          eval_metric_ops=eval_metric_ops,
+          export_outputs=export_outputs)
+  return model_fn
+def _build_experiment_fn(train_steps, eval_steps):
+  """Returns a function that creates an `Experiment`."""
+  def build_experiment(run_config, hparams):
+    """Builds an `Experiment` from configuration and hyperparameters.
+    Args:
+      run_config: A `RunConfig`.
+      hparams: A `HParams`.
+    Returns:
+      An `Experiment` object.
+    """
+    return populate_experiment(run_config, hparams, FLAGS.pipeline_config_path,
+                               train_steps, eval_steps)
+  return build_experiment
+def populate_experiment(run_config,
+                        hparams,
+                        pipeline_config_path,
+                        train_steps=None,
+                        eval_steps=None,
+                        model_fn_creator=create_model_fn,
+                        **kwargs):
+  """Populates an `Experiment` object.
+  Args:
+    run_config: A `RunConfig`.
+    hparams: A `HParams`.
+    pipeline_config_path: A path to a pipeline config file.
+    train_steps: Number of training steps. If None, the number of training steps
+      is set from the `TrainConfig` proto.
+    eval_steps: Number of evaluation steps per evaluation cycle. If None, the
+      number of evaluation steps is set from the `EvalConfig` proto.
+    model_fn_creator: A function that creates a `model_fn` for `Estimator`.
+      Follows the signature:
+      * Args:
+        * `detection_model_fn`: Function that returns `DetectionModel` instance.
+        * `configs`: Dictionary of pipeline config objects.
+        * `hparams`: `HParams` object.
+      * Returns:
+        `model_fn` for `Estimator`.
+    **kwargs: Additional keyword arguments for configuration override.
+  Returns:
+    An `Experiment` that defines all aspects of training, evaluation, and
+    export.
+  """
+  configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
+  configs = config_util.merge_external_params_with_configs(
+      configs,
+      hparams,
+      train_steps=train_steps,
+      eval_steps=eval_steps,
+      **kwargs)
+  model_config = configs['model']
+  train_config = configs['train_config']
+  train_input_config = configs['train_input_config']
+  eval_config = configs['eval_config']
+  eval_input_config = configs['eval_input_config']
+  if train_steps is None:
+    train_steps = train_config.num_steps if train_config.num_steps else None
+  if eval_steps is None:
+    eval_steps = eval_config.num_examples if eval_config.num_examples else None
+  detection_model_fn = functools.partial(
+      model_builder.build, model_config=model_config)
+  # Create the input functions for TRAIN/EVAL.
+  train_input_fn = inputs.create_train_input_fn(
+      train_config=train_config,
+      train_input_config=train_input_config,
+      model_config=model_config)
+  eval_input_fn = inputs.create_eval_input_fn(
+      eval_config=eval_config,
+      eval_input_config=eval_input_config,
+      model_config=model_config)
+  export_strategies = [
+      tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
+          serving_input_fn=inputs.create_predict_input_fn(
+              model_config=model_config))
+  ]
+  estimator = tf.estimator.Estimator(
+      model_fn=model_fn_creator(detection_model_fn, configs, hparams),
+      config=run_config)
+  if run_config.is_chief:
+    # Store the final pipeline config for traceability.
+    pipeline_config_final = config_util.create_pipeline_proto_from_configs(
+        configs)
+    pipeline_config_final_path = os.path.join(estimator.model_dir,
+                                              'pipeline.config')
+    config_text = text_format.MessageToString(pipeline_config_final)
+    with tf.gfile.Open(pipeline_config_final_path, 'wb') as f:
+      tf.logging.info('Writing as-run pipeline config file to %s',
+                      pipeline_config_final_path)
+      f.write(config_text)
+  return tf.contrib.learn.Experiment(
+      estimator=estimator,
+      train_input_fn=train_input_fn,
+      eval_input_fn=eval_input_fn,
+      train_steps=train_steps,
+      eval_steps=eval_steps,
+      export_strategies=export_strategies,
+      eval_delay_secs=120,)
+def main(unused_argv):
+  tf.flags.mark_flag_as_required('model_dir')
+  tf.flags.mark_flag_as_required('pipeline_config_path')
+  config = tf.contrib.learn.RunConfig(model_dir=FLAGS.model_dir)
+  learn_runner.run(
+      experiment_fn=_build_experiment_fn(FLAGS.num_train_steps,
+                                         FLAGS.num_eval_steps),
+      run_config=config,
+      hparams=model_hparams.create_hparams())
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/object_detection/model_hparams.py
+++ b/research/object_detection/model_hparams.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Hyperparameters for the object detection model in TF.learn.
+This file consolidates and documents the hyperparameters used by the model.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+def create_hparams(hparams_overrides=None):
+  """Returns hyperparameters, including any flag value overrides.
+  Args:
+    hparams_overrides: Optional hparams overrides, represented as a
+      string containing comma-separated hparam_name=value pairs.
+  Returns:
+    The hyperparameters as a tf.HParams object.
+  """
+  hparams = tf.contrib.training.HParams(
+      # Whether a fine tuning checkpoint (provided in the pipeline config)
+      # should be loaded for training.
+      load_pretrained=True)
+  # Override any of the preceding hyperparameter values.
+  if hparams_overrides:
+    hparams = hparams.parse(hparams_overrides)
+  return hparams
--- a/research/object_detection/model_test.py
+++ b/research/object_detection/model_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object detection model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import functools
+import os
+import numpy as np
+import tensorflow as tf
+from object_detection import inputs
+from object_detection import model
+from object_detection import model_hparams
+from object_detection import model_test_util
+from object_detection.builders import model_builder
+from object_detection.core import standard_fields as fields
+from object_detection.utils import config_util
+FLAGS = tf.flags.FLAGS
+MODEL_NAME_FOR_TEST = model_test_util.SSD_INCEPTION_MODEL_NAME
+def _get_data_path():
+  """Returns an absolute path to TFRecord file."""
+  return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'test_data',
+                      'pets_examples.record')
+def _get_labelmap_path():
+  """Returns an absolute path to label map file."""
+  return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'data',
+                      'pet_label_map.pbtxt')
+def _get_configs_for_model(model_name):
+  """Returns configurations for model."""
+  filename = model_test_util.GetPipelineConfigPath(model_name)
+  data_path = _get_data_path()
+  label_map_path = _get_labelmap_path()
+  configs = config_util.get_configs_from_pipeline_file(filename)
+  configs = config_util.merge_external_params_with_configs(
+      configs,
+      train_input_path=data_path,
+      eval_input_path=data_path,
+      label_map_path=label_map_path)
+  return configs
+def setUpModule():
+  model_test_util.InitializeFlags(MODEL_NAME_FOR_TEST)
+class ModelTflearnTest(tf.test.TestCase):
+  @classmethod
+  def setUpClass(cls):
+    tf.reset_default_graph()
+  def _assert_outputs_for_train_eval(self, configs, mode, class_agnostic=False):
+    model_config = configs['model']
+    train_config = configs['train_config']
+    with tf.Graph().as_default():
+      if mode == tf.estimator.ModeKeys.TRAIN:
+        features, labels = inputs.create_train_input_fn(
+            configs['train_config'],
+            configs['train_input_config'],
+            configs['model'])()
+        batch_size = train_config.batch_size
+      else:
+        features, labels = inputs.create_eval_input_fn(
+            configs['eval_config'],
+            configs['eval_input_config'],
+            configs['model'])()
+        batch_size = 1
+      detection_model_fn = functools.partial(
+          model_builder.build, model_config=model_config, is_training=True)
+      hparams = model_hparams.create_hparams(
+          hparams_overrides='load_pretrained=false')
+      model_fn = model.create_model_fn(detection_model_fn, configs, hparams)
+      estimator_spec = model_fn(features, labels, mode)
+      self.assertIsNotNone(estimator_spec.loss)
+      self.assertIsNotNone(estimator_spec.predictions)
+      if class_agnostic:
+        self.assertNotIn('detection_classes', estimator_spec.predictions)
+      else:
+        detection_classes = estimator_spec.predictions['detection_classes']
+        self.assertEqual(batch_size, detection_classes.shape.as_list()[0])
+        self.assertEqual(tf.float32, detection_classes.dtype)
+      detection_boxes = estimator_spec.predictions['detection_boxes']
+      detection_scores = estimator_spec.predictions['detection_scores']
+      num_detections = estimator_spec.predictions['num_detections']
+      self.assertEqual(batch_size, detection_boxes.shape.as_list()[0])
+      self.assertEqual(tf.float32, detection_boxes.dtype)
+      self.assertEqual(batch_size, detection_scores.shape.as_list()[0])
+      self.assertEqual(tf.float32, detection_scores.dtype)
+      self.assertEqual(tf.float32, num_detections.dtype)
+      if mode == tf.estimator.ModeKeys.TRAIN:
+        self.assertIsNotNone(estimator_spec.train_op)
+      return estimator_spec
+  def _assert_outputs_for_predict(self, configs):
+    model_config = configs['model']
+    with tf.Graph().as_default():
+      features, _ = inputs.create_eval_input_fn(
+          configs['eval_config'],
+          configs['eval_input_config'],
+          configs['model'])()
+      detection_model_fn = functools.partial(
+          model_builder.build, model_config=model_config, is_training=False)
+      hparams = model_hparams.create_hparams(
+          hparams_overrides='load_pretrained=false')
+      model_fn = model.create_model_fn(detection_model_fn, configs, hparams)
+      estimator_spec = model_fn(features, None, tf.estimator.ModeKeys.PREDICT)
+      self.assertIsNone(estimator_spec.loss)
+      self.assertIsNone(estimator_spec.train_op)
+      self.assertIsNotNone(estimator_spec.predictions)
+      self.assertIsNotNone(estimator_spec.export_outputs)
+      self.assertIn(tf.saved_model.signature_constants.PREDICT_METHOD_NAME,
+                    estimator_spec.export_outputs)
+  def testModelFnInTrainMode(self):
+    """Tests the model function in TRAIN mode."""
+    configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
+    self._assert_outputs_for_train_eval(configs, tf.estimator.ModeKeys.TRAIN)
+  def testModelFnInEvalMode(self):
+    """Tests the model function in EVAL mode."""
+    configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
+    self._assert_outputs_for_train_eval(configs, tf.estimator.ModeKeys.EVAL)
+  def testModelFnInPredictMode(self):
+    """Tests the model function in PREDICT mode."""
+    configs = _get_configs_for_model(MODEL_NAME_FOR_TEST)
+    self._assert_outputs_for_predict(configs)
+  def testExperiment(self):
+    """Tests that the `Experiment` object is constructed correctly."""
+    experiment = model_test_util.BuildExperiment()
+    model_dir = experiment.estimator.model_dir
+    pipeline_config_path = os.path.join(model_dir, 'pipeline.config')
+    self.assertTrue(tf.gfile.Exists(pipeline_config_path))
+class UnbatchTensorsTest(tf.test.TestCase):
+  def test_unbatch_without_unpadding(self):
+    image_placeholder = tf.placeholder(tf.float32, [2, None, None, None])
+    groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, None, None])
+    groundtruth_classes_placeholder = tf.placeholder(tf.float32,
+                                                     [2, None, None])
+    groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, None])
+    tensor_dict = {
+        fields.InputDataFields.image:
+            image_placeholder,
+        fields.InputDataFields.groundtruth_boxes:
+            groundtruth_boxes_placeholder,
+        fields.InputDataFields.groundtruth_classes:
+            groundtruth_classes_placeholder,
+        fields.InputDataFields.groundtruth_weights:
+            groundtruth_weights_placeholder
+    }
+    unbatched_tensor_dict = model.unstack_batch(
+        tensor_dict, unpad_groundtruth_tensors=False)
+    with self.test_session() as sess:
+      unbatched_tensor_dict_out = sess.run(
+          unbatched_tensor_dict,
+          feed_dict={
+              image_placeholder:
+                  np.random.rand(2, 4, 4, 3).astype(np.float32),
+              groundtruth_boxes_placeholder:
+                  np.random.rand(2, 5, 4).astype(np.float32),
+              groundtruth_classes_placeholder:
+                  np.random.rand(2, 5, 6).astype(np.float32),
+              groundtruth_weights_placeholder:
+                  np.random.rand(2, 5).astype(np.float32)
+          })
+    for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]:
+      self.assertAllEqual(image_out.shape, [4, 4, 3])
+    for groundtruth_boxes_out in unbatched_tensor_dict_out[
+        fields.InputDataFields.groundtruth_boxes]:
+      self.assertAllEqual(groundtruth_boxes_out.shape, [5, 4])
+    for groundtruth_classes_out in unbatched_tensor_dict_out[
+        fields.InputDataFields.groundtruth_classes]:
+      self.assertAllEqual(groundtruth_classes_out.shape, [5, 6])
+    for groundtruth_weights_out in unbatched_tensor_dict_out[
+        fields.InputDataFields.groundtruth_weights]:
+      self.assertAllEqual(groundtruth_weights_out.shape, [5])
+  def test_unbatch_and_unpad_groundtruth_tensors(self):
+    image_placeholder = tf.placeholder(tf.float32, [2, None, None, None])
+    groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, 5, None])
+    groundtruth_classes_placeholder = tf.placeholder(tf.float32, [2, 5, None])
+    groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, 5])
+    num_groundtruth_placeholder = tf.placeholder(tf.int32, [2])
+    tensor_dict = {
+        fields.InputDataFields.image:
+            image_placeholder,
+        fields.InputDataFields.groundtruth_boxes:
+            groundtruth_boxes_placeholder,
+        fields.InputDataFields.groundtruth_classes:
+            groundtruth_classes_placeholder,
+        fields.InputDataFields.groundtruth_weights:
+            groundtruth_weights_placeholder,
+        fields.InputDataFields.num_groundtruth_boxes:
+            num_groundtruth_placeholder
+    }
+    unbatched_tensor_dict = model.unstack_batch(
+        tensor_dict, unpad_groundtruth_tensors=True)
+    with self.test_session() as sess:
+      unbatched_tensor_dict_out = sess.run(
+          unbatched_tensor_dict,
+          feed_dict={
+              image_placeholder:
+                  np.random.rand(2, 4, 4, 3).astype(np.float32),
+              groundtruth_boxes_placeholder:
+                  np.random.rand(2, 5, 4).astype(np.float32),
+              groundtruth_classes_placeholder:
+                  np.random.rand(2, 5, 6).astype(np.float32),
+              groundtruth_weights_placeholder:
+                  np.random.rand(2, 5).astype(np.float32),
+              num_groundtruth_placeholder:
+                  np.array([3, 3], np.int32)
+          })
+    for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]:
+      self.assertAllEqual(image_out.shape, [4, 4, 3])
+    for groundtruth_boxes_out in unbatched_tensor_dict_out[
+        fields.InputDataFields.groundtruth_boxes]:
+      self.assertAllEqual(groundtruth_boxes_out.shape, [3, 4])
+    for groundtruth_classes_out in unbatched_tensor_dict_out[
+        fields.InputDataFields.groundtruth_classes]:
+      self.assertAllEqual(groundtruth_classes_out.shape, [3, 6])
+    for groundtruth_weights_out in unbatched_tensor_dict_out[
+        fields.InputDataFields.groundtruth_weights]:
+      self.assertAllEqual(groundtruth_weights_out.shape, [3])
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/model_test_util.py
+++ b/research/object_detection/model_test_util.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Common utils for tests for object detection tflearn model."""
+from __future__ import absolute_import
+import os
+import tempfile
+import tensorflow as tf
+from object_detection import model
+from object_detection import model_hparams
+FLAGS = tf.flags.FLAGS
+FASTER_RCNN_MODEL_NAME = 'faster_rcnn_resnet50_pets'
+SSD_INCEPTION_MODEL_NAME = 'ssd_inception_v2_pets'
+PATH_BASE = 'google3/third_party/tensorflow_models/object_detection/'
+def GetPipelineConfigPath(model_name):
+  """Returns path to the local pipeline config file."""
+  return os.path.join(FLAGS.test_srcdir, PATH_BASE, 'samples', 'configs',
+                      model_name + '.config')
+def InitializeFlags(model_name_for_test):
+  FLAGS.model_dir = tempfile.mkdtemp()
+  FLAGS.pipeline_config_path = GetPipelineConfigPath(model_name_for_test)
+def BuildExperiment():
+  """Builds an Experiment object for testing purposes."""
+  run_config = tf.contrib.learn.RunConfig()
+  hparams = model_hparams.create_hparams(
+      hparams_overrides='load_pretrained=false')
+  # pylint: disable=protected-access
+  experiment_fn = model._build_experiment_fn(10, 10)
+  # pylint: enable=protected-access
+  return experiment_fn(run_config, hparams)
--- a/research/object_detection/model_tpu.py
+++ b/research/object_detection/model_tpu.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Creates and runs `Estimator` for object detection model on TPUs.
+This uses the TPUEstimator API to define and run a model in TRAIN/EVAL modes.
+"""
+# pylint: enable=line-too-long
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import functools
+import os
+import tensorflow as tf
+from tensorflow.contrib.tpu.python.tpu import tpu_config
+from tensorflow.contrib.tpu.python.tpu import tpu_estimator
+from tensorflow.contrib.training.python.training import evaluation
+from object_detection import inputs
+from object_detection import model
+from object_detection import model_hparams
+from object_detection.builders import model_builder
+from object_detection.utils import config_util
+tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
+# Cloud TPU Cluster Resolvers
+tf.flags.DEFINE_string(
+    'gcp_project',
+    default=None,
+    help='Project name for the Cloud TPU-enabled project. If not specified, we '
+    'will attempt to automatically detect the GCE project from metadata.')
+tf.flags.DEFINE_string(
+    'tpu_zone',
+    default=None,
+    help='GCE zone where the Cloud TPU is located in. If not specified, we '
+    'will attempt to automatically detect the GCE project from metadata.')
+tf.flags.DEFINE_string(
+    'tpu_name',
+    default=None,
+    help='Name of the Cloud TPU for Cluster Resolvers. You must specify either '
+    'this flag or --master.')
+tf.flags.DEFINE_string(
+    'master', default=None,
+    help='GRPC URL of the master (e.g. grpc://ip.address.of.tpu:8470). You '
+    'must specify either this flag or --tpu_name.')
+tf.flags.DEFINE_integer('num_shards', 8, 'Number of shards (TPU cores).')
+tf.flags.DEFINE_integer('iterations_per_loop', 100,
+                        'Number of iterations per TPU training loop.')
+# For mode=train_and_eval, evaluation occurs after training is finished.
+# Note: independently of steps_per_checkpoint, estimator will save the most
+# recent checkpoint every 10 minutes by default for train_and_eval
+tf.flags.DEFINE_string('mode', 'train_and_eval',
+                       'Mode to run: train, eval, train_and_eval')
+tf.flags.DEFINE_integer('train_batch_size', 32 * 8, 'Batch size for training.')
+# For EVAL.
+tf.flags.DEFINE_integer('min_eval_interval_secs', 180,
+                        'Minimum seconds between evaluations.')
+tf.flags.DEFINE_integer(
+    'eval_timeout_secs', None,
+    'Maximum seconds between checkpoints before evaluation terminates.')
+FLAGS = tf.flags.FLAGS
+def create_estimator(run_config,
+                     hparams,
+                     pipeline_config_path,
+                     train_steps=None,
+                     eval_steps=None,
+                     train_batch_size=None,
+                     model_fn_creator=model.create_model_fn,
+                     use_tpu=False,
+                     num_shards=1,
+                     params=None,
+                     **kwargs):
+  """Creates an `Estimator` object.
+  Args:
+    run_config: A `RunConfig`.
+    hparams: A `HParams`.
+    pipeline_config_path: A path to a pipeline config file.
+    train_steps: Number of training steps. If None, the number of training steps
+      is set from the `TrainConfig` proto.
+    eval_steps: Number of evaluation steps per evaluation cycle. If None, the
+      number of evaluation steps is set from the `EvalConfig` proto.
+    train_batch_size: Training batch size. If none, use batch size from
+      `TrainConfig` proto.
+    model_fn_creator: A function that creates a `model_fn` for `Estimator`.
+      Follows the signature:
+      * Args:
+        * `detection_model_fn`: Function that returns `DetectionModel` instance.
+        * `configs`: Dictionary of pipeline config objects.
+        * `hparams`: `HParams` object.
+      * Returns:
+        `model_fn` for `Estimator`.
+    use_tpu: Boolean, whether training and evaluation should run on TPU.
+    num_shards: Number of shards (TPU cores).
+    params: Parameter dictionary passed from the estimator.
+    **kwargs: Additional keyword arguments for configuration override.
+  Returns:
+    Estimator: A estimator object used for training and evaluation
+    train_input_fn: Input function for the training loop
+    eval_input_fn: Input function for the evaluation run
+    train_steps: Number of training steps either from arg `train_steps` or
+      `TrainConfig` proto
+    eval_steps: Number of evaluation steps either from arg `eval_steps` or
+      `EvalConfig` proto
+  """
+  configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
+  configs = config_util.merge_external_params_with_configs(
+      configs,
+      hparams,
+      train_steps=train_steps,
+      eval_steps=eval_steps,
+      batch_size=train_batch_size,
+      **kwargs)
+  model_config = configs['model']
+  train_config = configs['train_config']
+  train_input_config = configs['train_input_config']
+  eval_config = configs['eval_config']
+  eval_input_config = configs['eval_input_config']
+  if params is None:
+    params = {}
+  if train_steps is None:
+    train_steps = train_config.num_steps if train_config.num_steps else None
+  if eval_steps is None:
+    eval_steps = eval_config.num_examples if eval_config.num_examples else None
+  detection_model_fn = functools.partial(
+      model_builder.build, model_config=model_config)
+  # Create the input functions for TRAIN/EVAL.
+  train_input_fn = inputs.create_train_input_fn(
+      train_config=train_config,
+      train_input_config=train_input_config,
+      model_config=model_config)
+  eval_input_fn = inputs.create_eval_input_fn(
+      eval_config=eval_config,
+      eval_input_config=eval_input_config,
+      model_config=model_config)
+  estimator = tpu_estimator.TPUEstimator(
+      model_fn=model_fn_creator(detection_model_fn, configs, hparams,
+                                use_tpu),
+      train_batch_size=train_config.batch_size,
+      # For each core, only batch size 1 is supported for eval.
+      eval_batch_size=num_shards * 1 if use_tpu else 1,
+      use_tpu=use_tpu,
+      config=run_config,
+      params=params)
+  return estimator, train_input_fn, eval_input_fn, train_steps, eval_steps
+def main(unused_argv):
+  tf.flags.mark_flag_as_required('model_dir')
+  tf.flags.mark_flag_as_required('pipeline_config_path')
+  if FLAGS.master is None and FLAGS.tpu_name is None:
+    raise RuntimeError('You must specify either --master or --tpu_name.')
+  if FLAGS.master is not None:
+    if FLAGS.tpu_name is not None:
+      tf.logging.warn('Both --master and --tpu_name are set. Ignoring '
+                      '--tpu_name and using --master.')
+    tpu_grpc_url = FLAGS.master
+  else:
+    tpu_cluster_resolver = (
+        tf.contrib.cluster_resolver.python.training.TPUClusterResolver(
+            tpu_names=[FLAGS.tpu_name],
+            zone=FLAGS.tpu_zone,
+            project=FLAGS.gcp_project))
+    tpu_grpc_url = tpu_cluster_resolver.get_master()
+  config = tpu_config.RunConfig(
+      master=tpu_grpc_url,
+      evaluation_master=tpu_grpc_url,
+      model_dir=FLAGS.model_dir,
+      tpu_config=tpu_config.TPUConfig(
+          iterations_per_loop=FLAGS.iterations_per_loop,
+          num_shards=FLAGS.num_shards))
+  params = {}
+  estimator, train_input_fn, eval_input_fn, train_steps, eval_steps = (
+      create_estimator(
+          config,
+          model_hparams.create_hparams(),
+          FLAGS.pipeline_config_path,
+          train_steps=FLAGS.num_train_steps,
+          eval_steps=FLAGS.num_eval_steps,
+          train_batch_size=FLAGS.train_batch_size,
+          use_tpu=FLAGS.use_tpu,
+          num_shards=FLAGS.num_shards,
+          params=params))
+  if FLAGS.mode in ['train', 'train_and_eval']:
+    estimator.train(input_fn=train_input_fn, max_steps=train_steps)
+  if FLAGS.mode == 'train_and_eval':
+    # Eval one time.
+    eval_results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
+    tf.logging.info('Eval results: %s' % eval_results)
+  # Continuously evaluating.
+  if FLAGS.mode == 'eval':
+    def terminate_eval():
+      tf.logging.info('Terminating eval after %d seconds of no checkpoints' %
+                      FLAGS.eval_timeout_secs)
+      return True
+    # Run evaluation when there's a new checkpoint.
+    for ckpt in evaluation.checkpoints_iterator(
+        FLAGS.model_dir,
+        min_interval_secs=FLAGS.min_eval_interval_secs,
+        timeout=FLAGS.eval_timeout_secs,
+        timeout_fn=terminate_eval):
+      tf.logging.info('Starting to evaluate.')
+      try:
+        eval_results = estimator.evaluate(
+            input_fn=eval_input_fn,
+            steps=eval_steps,
+            checkpoint_path=ckpt)
+        tf.logging.info('Eval results: %s' % eval_results)
+        # Terminate eval job when final checkpoint is reached
+        current_step = int(os.path.basename(ckpt).split('-')[1])
+        if current_step >= train_steps:
+          tf.logging.info(
+              'Evaluation finished after training step %d' % current_step)
+          break
+      except tf.errors.NotFoundError:
+        tf.logging.info(
+            'Checkpoint %s no longer exists, skipping checkpoint' % ckpt)
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/object_detection/models/BUILD
+++ b/research/object_detection/models/BUILD
@@ -15,6 +15,7 @@ py_library(
    ],
    deps = [
        "//tensorflow",
+        "//tensorflow/models/research/object_detection/utils:ops",
    ],
 )
@@ -36,6 +37,7 @@ py_library(
    ],
    deps = [
        "//tensorflow",
+        "//tensorflow/models/research/object_detection/utils:test_case",
    ],
 )
@@ -47,9 +49,10 @@ py_library(
    deps = [
        ":feature_map_generators",
        "//tensorflow",
-        "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
+        "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
-        "//tensorflow_models/object_detection/utils:ops",
+        "//tensorflow/models/research/object_detection/utils:ops",
-        "//tensorflow_models/slim:inception_v2",
+        "//tensorflow/models/research/object_detection/utils:shape_utils",
+        "//third_party/tensorflow_models/slim:inception_v2",
    ],
 )
@@ -61,9 +64,10 @@ py_library(
    deps = [
        ":feature_map_generators",
        "//tensorflow",
-        "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
+        "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
-        "//tensorflow_models/object_detection/utils:ops",
+        "//tensorflow/models/research/object_detection/utils:ops",
-        "//tensorflow_models/slim:inception_v3",
+        "//tensorflow/models/research/object_detection/utils:shape_utils",
+        "//third_party/tensorflow_models/slim:inception_v3",
    ],
 )
@@ -73,9 +77,10 @@ py_library(
    deps = [
        ":feature_map_generators",
        "//tensorflow",
-        "//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
+        "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
-        "//tensorflow_models/object_detection/utils:ops",
+        "//tensorflow/models/research/object_detection/utils:ops",
-        "//tensorflow_models/slim:mobilenet_v1",
+        "//tensorflow/models/research/object_detection/utils:shape_utils",
+        "//third_party/tensorflow_models/slim:mobilenet_v1",
    ],
 )
@@ -86,8 +91,40 @@ py_library(
        ":feature_map_generators",
        ":ssd_mobilenet_v1_feature_extractor",
        "//tensorflow",
-        "//tensorflow_models/object_detection/utils:ops",
+        "//tensorflow/models/research/object_detection/utils:ops",
-        "//tensorflow_models/slim:mobilenet_v1",
+        "//third_party/tensorflow_models/slim:mobilenet_v1",
+    ],
+)
+py_library(
+    name = "ssd_resnet_v1_fpn_feature_extractor",
+    srcs = ["ssd_resnet_v1_fpn_feature_extractor.py"],
+    deps = [
+        ":feature_map_generators",
+        "//tensorflow",
+        "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
+        "//tensorflow/models/research/object_detection/utils:ops",
+        "//tensorflow/models/research/object_detection/utils:shape_utils",
+        "//third_party/tensorflow_models/slim:resnet_v1",
+    ],
+)
+py_library(
+    name = "ssd_resnet_v1_fpn_feature_extractor_testbase",
+    srcs = ["ssd_resnet_v1_fpn_feature_extractor_testbase.py"],
+    deps = [
+        "//tensorflow/models/research/object_detection/models:ssd_feature_extractor_test",
+    ],
+)
+py_test(
+    name = "ssd_resnet_v1_fpn_feature_extractor_test",
+    timeout = "long",
+    srcs = ["ssd_resnet_v1_fpn_feature_extractor_test.py"],
+    deps = [
+        ":ssd_resnet_v1_fpn_feature_extractor",
+        ":ssd_resnet_v1_fpn_feature_extractor_testbase",
+        "//tensorflow",
    ],
 )
@@ -153,8 +190,8 @@ py_library(
    ],
    deps = [
        "//tensorflow",
-        "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
+        "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
-        "//tensorflow_models/slim:nasnet",
+        "//third_party/tensorflow_models/slim:nasnet",
    ],
 )
@@ -165,8 +202,8 @@ py_library(
    ],
    deps = [
        "//tensorflow",
-        "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
+        "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
-        "//tensorflow_models/slim:inception_resnet_v2",
+        "//third_party/tensorflow_models/slim:inception_resnet_v2",
    ],
 )
@@ -188,8 +225,8 @@ py_library(
    ],
    deps = [
        "//tensorflow",
-        "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
+        "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
-        "//tensorflow_models/slim:inception_v2",
+        "//third_party/tensorflow_models/slim:inception_v2",
    ],
 )
@@ -211,9 +248,9 @@ py_library(
    ],
    deps = [
        "//tensorflow",
-        "//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
+        "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
-        "//tensorflow_models/slim:resnet_utils",
+        "//third_party/tensorflow_models/slim:resnet_utils",
-        "//tensorflow_models/slim:resnet_v1",
+        "//third_party/tensorflow_models/slim:resnet_v1",
    ],
 )

--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
@@ -51,7 +51,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
               pad_to_multiple,
               conv_hyperparams,
               batch_norm_trainable=True,
-               reuse_weights=None):
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False):
    """MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
    Args:
@@ -66,6 +68,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
        (e.g. 1), it is desirable to disable batch norm update and use
        pretrained batch norm params.
      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False.
+      use_depthwise: Whether to use depthwise convolutions. Default is False.
    Raises:
      ValueError: upon invalid `pad_to_multiple` values.
@@ -76,7 +81,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
    super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable, reuse_weights)
+        conv_hyperparams, batch_norm_trainable, reuse_weights,
+        use_explicit_padding, use_depthwise)
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.
@@ -88,13 +94,25 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
+    Raises:
+      ValueError: if image height or width are not 256 pixels.
    """
-    preprocessed_inputs.get_shape().assert_has_rank(4)
+    image_shape = preprocessed_inputs.get_shape()
-    shape_assert = tf.Assert(
+    image_shape.assert_has_rank(4)
-        tf.logical_and(
+    image_height = image_shape[1].value
-            tf.equal(tf.shape(preprocessed_inputs)[1], 256),
+    image_width = image_shape[2].value
-            tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
-        ['image size must be 256 in both height and width.'])
+    if image_height is None or image_width is None:
+      shape_assert = tf.Assert(
+          tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
+                         tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
+          ['image size must be 256 in both height and width.'])
+      with tf.control_dependencies([shape_assert]):
+        preprocessed_inputs = tf.identity(preprocessed_inputs)
+    elif image_height != 256 or image_width != 256:
+      raise ValueError('image size must be = 256 in both height and width;'
+                       ' image dim = %d,%d' % (image_height, image_width))
    feature_map_layout = {
        'from_layer': [
@@ -102,10 +120,12 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
        ],
        'layer_depth': [-1, -1, 512, 256, 256],
        'conv_kernel_size': [-1, -1, 3, 3, 2],
+        'use_explicit_padding': self._use_explicit_padding,
+        'use_depthwise': self._use_depthwise,
    }
-    with tf.control_dependencies([shape_assert]):
+    with slim.arg_scope(self._conv_hyperparams):
-      with slim.arg_scope(self._conv_hyperparams):
+      with slim.arg_scope([slim.batch_norm], fused=False):
        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
          _, image_features = mobilenet_v1.mobilenet_v1_base(

--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor_test.py
@@ -22,7 +22,7 @@ from object_detection.models import ssd_feature_extractor_test
 class EmbeddedSSDMobileNetV1FeatureExtractorTest(
-    ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
+    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
                                is_training=True, batch_norm_trainable=True):
@@ -51,11 +51,23 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
    image_width = 256
    depth_multiplier = 1.0
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024),
+    expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024),
-                                  (4, 4, 4, 512), (4, 2, 2, 256),
+                                  (2, 4, 4, 512), (2, 2, 2, 256),
-                                  (4, 1, 1, 256)]
+                                  (2, 1, 1, 256)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape)
+  def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024),
+                                  (2, 4, 4, 512), (2, 2, 2, 256),
+                                  (2, 1, 1, 256)]
+    self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
@@ -63,10 +75,10 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
    image_width = 256
    depth_multiplier = 0.5**12
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 16, 16, 32), (4, 8, 8, 32), (4, 4, 4, 32),
+    expected_feature_map_shape = [(2, 16, 16, 32), (2, 8, 8, 32), (2, 4, 4, 32),
-                                  (4, 2, 2, 32), (4, 1, 1, 32)]
+                                  (2, 2, 2, 32), (2, 1, 1, 32)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_with_pad_to_multiple_of_1(
@@ -75,11 +87,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
    image_width = 256
    depth_multiplier = 1.0
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 16, 16, 512), (4, 8, 8, 1024),
+    expected_feature_map_shape = [(2, 16, 16, 512), (2, 8, 8, 1024),
-                                  (4, 4, 4, 512), (4, 2, 2, 256),
+                                  (2, 4, 4, 512), (2, 2, 2, 256),
-                                  (4, 1, 1, 256)]
+                                  (2, 1, 1, 256)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_raises_error_with_pad_to_multiple_not_1(self):

--- a/research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_inception_resnet_v2_feature_extractor.py
@@ -180,7 +180,7 @@ class FasterRCNNInceptionResnetV2FeatureExtractor(
    faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
    InceptionResnetV2 checkpoints.
-    TODO: revisit whether it's possible to force the
+    TODO(jonathanhuang,rathodv): revisit whether it's possible to force the
    `Repeat` namescope as created in `_extract_box_classifier_features` to
    start counting at 2 (e.g. `Repeat_2`) so that the default restore_fn can
    be used.

--- a/research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
@@ -111,7 +111,8 @@ class FasterRCNNResnetV1FeatureExtractor(
    with tf.control_dependencies([shape_assert]):
      # Disables batchnorm for fine-tuning with smaller batch sizes.
-      # TODO: Figure out if it is needed when image batch size is bigger.
+      # TODO: Figure out if it is needed when image
+      # batch size is bigger.
      with slim.arg_scope(
          resnet_utils.resnet_arg_scope(
              batch_norm_epsilon=1e-5,

--- a/research/object_detection/models/feature_map_generators.py
+++ b/research/object_detection/models/feature_map_generators.py
@@ -25,6 +25,7 @@ of final feature maps.
 """
 import collections
 import tensorflow as tf
+from object_detection.utils import ops
 slim = tf.contrib.slim
@@ -115,6 +116,9 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
  feature_map_keys = []
  feature_maps = []
  base_from_layer = ''
+  use_explicit_padding = False
+  if 'use_explicit_padding' in feature_map_layout:
+    use_explicit_padding = feature_map_layout['use_explicit_padding']
  use_depthwise = False
  if 'use_depthwise' in feature_map_layout:
    use_depthwise = feature_map_layout['use_depthwise']
@@ -139,16 +143,21 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
            padding='SAME',
            stride=1,
            scope=layer_name)
-      stride = 2
      layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format(
          base_from_layer, index, conv_kernel_size, conv_kernel_size,
          depth_fn(layer_depth))
+      stride = 2
+      padding = 'SAME'
+      if use_explicit_padding:
+        padding = 'VALID'
+        intermediate_layer = ops.fixed_padding(
+            intermediate_layer, conv_kernel_size)
      if use_depthwise:
        feature_map = slim.separable_conv2d(
            intermediate_layer,
            None, [conv_kernel_size, conv_kernel_size],
            depth_multiplier=1,
-            padding='SAME',
+            padding=padding,
            stride=stride,
            scope=layer_name + '_depthwise')
        feature_map = slim.conv2d(
@@ -161,10 +170,56 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
        feature_map = slim.conv2d(
            intermediate_layer,
            depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size],
-            padding='SAME',
+            padding=padding,
            stride=stride,
            scope=layer_name)
      feature_map_keys.append(layer_name)
    feature_maps.append(feature_map)
  return collections.OrderedDict(
      [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
+def fpn_top_down_feature_maps(image_features, depth, scope=None):
+  """Generates `top-down` feature maps for Feature Pyramid Networks.
+  See https://arxiv.org/abs/1612.03144 for details.
+  Args:
+    image_features: list of image feature tensors. Spatial resolutions of
+      succesive tensors must reduce exactly by a factor of 2.
+    depth: depth of output feature maps.
+    scope: A scope name to wrap this op under.
+  Returns:
+    feature_maps: an OrderedDict mapping keys (feature map names) to
+      tensors where each tensor has shape [batch, height_i, width_i, depth_i].
+  """
+  with tf.variable_scope(
+      scope, 'top_down', image_features):
+    num_levels = len(image_features)
+    output_feature_maps_list = []
+    output_feature_map_keys = []
+    with slim.arg_scope(
+        [slim.conv2d],
+        activation_fn=None, normalizer_fn=None, padding='SAME', stride=1):
+      top_down = slim.conv2d(
+          image_features[-1],
+          depth, [1, 1], scope='projection_%d' % num_levels)
+      output_feature_maps_list.append(top_down)
+      output_feature_map_keys.append(
+          'top_down_feature_map_%d' % (num_levels - 1))
+      for level in reversed(range(num_levels - 1)):
+        top_down = ops.nearest_neighbor_upsampling(top_down, 2)
+        residual = slim.conv2d(
+            image_features[level], depth, [1, 1],
+            scope='projection_%d' % (level + 1))
+        top_down = 0.5 * top_down + 0.5 * residual
+        output_feature_maps_list.append(slim.conv2d(
+            top_down,
+            depth, [3, 3],
+            activation_fn=None,
+            scope='smoothing_%d' % (level + 1)))
+        output_feature_map_keys.append('top_down_feature_map_%d' % level)
+      return collections.OrderedDict(
+          reversed(zip(output_feature_map_keys, output_feature_maps_list)))
--- a/research/object_detection/models/feature_map_generators_test.py
+++ b/research/object_detection/models/feature_map_generators_test.py
@@ -40,7 +40,7 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
 }
-# TODO(rathodv): add tests with different anchor strides.
+# TODO: add tests with different anchor strides.
 class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
  def test_get_expected_feature_map_shapes_with_inception_v2(self):
@@ -134,6 +134,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
      self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
+class FPNFeatureMapGeneratorTest(tf.test.TestCase):
+  def test_get_expected_feature_map_shapes(self):
+    image_features = [
+        tf.random_uniform([4, 8, 8, 256], dtype=tf.float32),
+        tf.random_uniform([4, 4, 4, 256], dtype=tf.float32),
+        tf.random_uniform([4, 2, 2, 256], dtype=tf.float32),
+        tf.random_uniform([4, 1, 1, 256], dtype=tf.float32),
+    ]
+    feature_maps = feature_map_generators.fpn_top_down_feature_maps(
+        image_features=image_features, depth=128)
+    expected_feature_map_shapes = {
+        'top_down_feature_map_0': (4, 8, 8, 128),
+        'top_down_feature_map_1': (4, 4, 4, 128),
+        'top_down_feature_map_2': (4, 2, 2, 128),
+        'top_down_feature_map_3': (4, 1, 1, 128)
+    }
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      out_feature_maps = sess.run(feature_maps)
+      out_feature_map_shapes = {key: value.shape
+                                for key, value in out_feature_maps.items()}
+      self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
 class GetDepthFunctionTest(tf.test.TestCase):
  def test_return_min_depth_when_multiplier_is_small(self):

--- a/research/object_detection/models/ssd_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_feature_extractor_test.py
@@ -17,33 +17,14 @@
 from abc import abstractmethod
+import itertools
 import numpy as np
 import tensorflow as tf
+from object_detection.utils import test_case
-class SsdFeatureExtractorTestBase(object):
-  def _validate_features_shape(self,
+class SsdFeatureExtractorTestBase(test_case.TestCase):
-                               feature_extractor,
-                               preprocessed_inputs,
-                               expected_feature_map_shapes):
-    """Checks the extracted features are of correct shape.
-    Args:
-      feature_extractor: The feature extractor to test.
-      preprocessed_inputs: A [batch, height, width, 3] tensor to extract
-                           features with.
-      expected_feature_map_shapes: The expected shape of the extracted features.
-    """
-    feature_maps = feature_extractor.extract_features(preprocessed_inputs)
-    feature_map_shapes = [tf.shape(feature_map) for feature_map in feature_maps]
-    init_op = tf.global_variables_initializer()
-    with self.test_session() as sess:
-      sess.run(init_op)
-      feature_map_shapes_out = sess.run(feature_map_shapes)
-      for shape_out, exp_shape_out in zip(
-          feature_map_shapes_out, expected_feature_map_shapes):
-        self.assertAllEqual(shape_out, exp_shape_out)
  @abstractmethod
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple):
@@ -59,14 +40,39 @@ class SsdFeatureExtractorTestBase(object):
    pass
  def check_extract_features_returns_correct_shape(
-      self, image_height, image_width, depth_multiplier, pad_to_multiple,
+      self, batch_size, image_height, image_width, depth_multiplier,
-      expected_feature_map_shapes_out):
+      pad_to_multiple, expected_feature_map_shapes):
-    feature_extractor = self._create_feature_extractor(depth_multiplier,
+    def graph_fn(image_tensor):
-                                                       pad_to_multiple)
+      feature_extractor = self._create_feature_extractor(depth_multiplier,
-    preprocessed_inputs = tf.random_uniform(
+                                                         pad_to_multiple)
-        [4, image_height, image_width, 3], dtype=tf.float32)
+      feature_maps = feature_extractor.extract_features(image_tensor)
-    self._validate_features_shape(
+      return feature_maps
-        feature_extractor, preprocessed_inputs, expected_feature_map_shapes_out)
+    image_tensor = np.random.rand(batch_size, image_height, image_width,
+                                  3).astype(np.float32)
+    feature_maps = self.execute(graph_fn, [image_tensor])
+    for feature_map, expected_shape in itertools.izip(
+        feature_maps, expected_feature_map_shapes):
+      self.assertAllEqual(feature_map.shape, expected_shape)
+  def check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+      self, batch_size, image_height, image_width, depth_multiplier,
+      pad_to_multiple, expected_feature_map_shapes):
+    def graph_fn(image_height, image_width):
+      feature_extractor = self._create_feature_extractor(depth_multiplier,
+                                                         pad_to_multiple)
+      image_tensor = tf.random_uniform([batch_size, image_height, image_width,
+                                        3], dtype=tf.float32)
+      feature_maps = feature_extractor.extract_features(image_tensor)
+      return feature_maps
+    feature_maps = self.execute_cpu(graph_fn, [
+        np.array(image_height, dtype=np.int32),
+        np.array(image_width, dtype=np.int32)
+    ])
+    for feature_map, expected_shape in itertools.izip(
+        feature_maps, expected_feature_map_shapes):
+      self.assertAllEqual(feature_map.shape, expected_shape)
  def check_extract_features_raises_error_with_invalid_image_size(
      self, image_height, image_width, depth_multiplier, pad_to_multiple):

--- a/research/object_detection/models/ssd_inception_v2_feature_extractor.py
+++ b/research/object_detection/models/ssd_inception_v2_feature_extractor.py
@@ -19,6 +19,7 @@ import tensorflow as tf
 from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
 from object_detection.utils import ops
+from object_detection.utils import shape_utils
 from nets import inception_v2
 slim = tf.contrib.slim
@@ -34,7 +35,9 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               pad_to_multiple,
               conv_hyperparams,
               batch_norm_trainable=True,
-               reuse_weights=None):
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False):
    """InceptionV2 Feature Extractor for SSD Models.
    Args:
@@ -49,10 +52,14 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        (e.g. 1), it is desirable to disable batch norm update and use
        pretrained batch norm params.
      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False.
+      use_depthwise: Whether to use depthwise convolutions. Default is False.
    """
    super(SSDInceptionV2FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable, reuse_weights)
+        conv_hyperparams, batch_norm_trainable, reuse_weights,
+        use_explicit_padding, use_depthwise)
  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -80,32 +87,30 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
-    preprocessed_inputs.get_shape().assert_has_rank(4)
+    preprocessed_inputs = shape_utils.check_min_image_dim(
-    shape_assert = tf.Assert(
+        33, preprocessed_inputs)
-        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
-                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
-        ['image size must at least be 33 in both height and width.'])
    feature_map_layout = {
        'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
+        'use_explicit_padding': self._use_explicit_padding,
+        'use_depthwise': self._use_depthwise,
    }
-    with tf.control_dependencies([shape_assert]):
+    with slim.arg_scope(self._conv_hyperparams):
-      with slim.arg_scope(self._conv_hyperparams):
+      with tf.variable_scope('InceptionV2',
-        with tf.variable_scope('InceptionV2',
+                             reuse=self._reuse_weights) as scope:
-                               reuse=self._reuse_weights) as scope:
+        _, image_features = inception_v2.inception_v2_base(
-          _, image_features = inception_v2.inception_v2_base(
+            ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
-              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+            final_endpoint='Mixed_5c',
-              final_endpoint='Mixed_5c',
+            min_depth=self._min_depth,
-              min_depth=self._min_depth,
+            depth_multiplier=self._depth_multiplier,
-              depth_multiplier=self._depth_multiplier,
+            scope=scope)
-              scope=scope)
+        feature_maps = feature_map_generators.multi_resolution_feature_maps(
-          feature_maps = feature_map_generators.multi_resolution_feature_maps(
+            feature_map_layout=feature_map_layout,
-              feature_map_layout=feature_map_layout,
+            depth_multiplier=self._depth_multiplier,
-              depth_multiplier=self._depth_multiplier,
+            min_depth=self._min_depth,
-              min_depth=self._min_depth,
+            insert_1x1_conv=True,
-              insert_1x1_conv=True,
+            image_features=image_features)
-              image_features=image_features)
    return feature_maps.values()
--- a/research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_inception_v2_feature_extractor_test.py
@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v2_feature_extractor
 class SsdInceptionV2FeatureExtractorTest(
-    ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
+    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
                                is_training=True, batch_norm_trainable=True):
@@ -49,11 +49,23 @@ class SsdInceptionV2FeatureExtractorTest(
    image_width = 128
    depth_multiplier = 1.0
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 8, 8, 576), (4, 4, 4, 1024),
+    expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024),
-                                  (4, 2, 2, 512), (4, 1, 1, 256),
+                                  (2, 2, 2, 512), (2, 1, 1, 256),
-                                  (4, 1, 1, 256), (4, 1, 1, 128)]
+                                  (2, 1, 1, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape)
+  def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 8, 8, 576), (2, 4, 4, 1024),
+                                  (2, 2, 2, 512), (2, 1, 1, 256),
+                                  (2, 1, 1, 256), (2, 1, 1, 128)]
+    self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_299(self):
@@ -61,11 +73,11 @@ class SsdInceptionV2FeatureExtractorTest(
    image_width = 299
    depth_multiplier = 1.0
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 19, 19, 576), (4, 10, 10, 1024),
+    expected_feature_map_shape = [(2, 19, 19, 576), (2, 10, 10, 1024),
-                                  (4, 5, 5, 512), (4, 3, 3, 256),
+                                  (2, 5, 5, 512), (2, 3, 3, 256),
-                                  (4, 2, 2, 256), (4, 1, 1, 128)]
+                                  (2, 2, 2, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
@@ -73,11 +85,11 @@ class SsdInceptionV2FeatureExtractorTest(
    image_width = 299
    depth_multiplier = 0.5**12
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 19, 19, 128), (4, 10, 10, 128),
+    expected_feature_map_shape = [(2, 19, 19, 128), (2, 10, 10, 128),
-                                  (4, 5, 5, 32), (4, 3, 3, 32),
+                                  (2, 5, 5, 32), (2, 3, 3, 32),
-                                  (4, 2, 2, 32), (4, 1, 1, 32)]
+                                  (2, 2, 2, 32), (2, 1, 1, 32)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
@@ -85,11 +97,11 @@ class SsdInceptionV2FeatureExtractorTest(
    image_width = 299
    depth_multiplier = 1.0
    pad_to_multiple = 32
-    expected_feature_map_shape = [(4, 20, 20, 576), (4, 10, 10, 1024),
+    expected_feature_map_shape = [(2, 20, 20, 576), (2, 10, 10, 1024),
-                                  (4, 5, 5, 512), (4, 3, 3, 256),
+                                  (2, 5, 5, 512), (2, 3, 3, 256),
-                                  (4, 2, 2, 256), (4, 1, 1, 128)]
+                                  (2, 2, 2, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_raises_error_with_invalid_image_size(self):

--- a/research/object_detection/models/ssd_inception_v3_feature_extractor.py
+++ b/research/object_detection/models/ssd_inception_v3_feature_extractor.py
@@ -19,6 +19,7 @@ import tensorflow as tf
 from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
 from object_detection.utils import ops
+from object_detection.utils import shape_utils
 from nets import inception_v3
 slim = tf.contrib.slim
@@ -34,7 +35,9 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               pad_to_multiple,
               conv_hyperparams,
               batch_norm_trainable=True,
-               reuse_weights=None):
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False):
    """InceptionV3 Feature Extractor for SSD Models.
    Args:
@@ -49,10 +52,14 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        (e.g. 1), it is desirable to disable batch norm update and use
        pretrained batch norm params.
      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False.
+      use_depthwise: Whether to use depthwise convolutions. Default is False.
    """
    super(SSDInceptionV3FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable, reuse_weights)
+        conv_hyperparams, batch_norm_trainable, reuse_weights,
+        use_explicit_padding, use_depthwise)
  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -80,32 +87,29 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
-    preprocessed_inputs.get_shape().assert_has_rank(4)
+    preprocessed_inputs = shape_utils.check_min_image_dim(
-    shape_assert = tf.Assert(
+        33, preprocessed_inputs)
-        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
-                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
-        ['image size must at least be 33 in both height and width.'])
    feature_map_layout = {
        'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
        'layer_depth': [-1, -1, -1, 512, 256, 128],
+        'use_explicit_padding': self._use_explicit_padding,
+        'use_depthwise': self._use_depthwise,
    }
-    with tf.control_dependencies([shape_assert]):
+    with slim.arg_scope(self._conv_hyperparams):
-      with slim.arg_scope(self._conv_hyperparams):
+      with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope:
-        with tf.variable_scope('InceptionV3',
+        _, image_features = inception_v3.inception_v3_base(
-                               reuse=self._reuse_weights) as scope:
+            ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
-          _, image_features = inception_v3.inception_v3_base(
+            final_endpoint='Mixed_7c',
-              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+            min_depth=self._min_depth,
-              final_endpoint='Mixed_7c',
+            depth_multiplier=self._depth_multiplier,
-              min_depth=self._min_depth,
+            scope=scope)
-              depth_multiplier=self._depth_multiplier,
+        feature_maps = feature_map_generators.multi_resolution_feature_maps(
-              scope=scope)
+            feature_map_layout=feature_map_layout,
-          feature_maps = feature_map_generators.multi_resolution_feature_maps(
+            depth_multiplier=self._depth_multiplier,
-              feature_map_layout=feature_map_layout,
+            min_depth=self._min_depth,
-              depth_multiplier=self._depth_multiplier,
+            insert_1x1_conv=True,
-              min_depth=self._min_depth,
+            image_features=image_features)
-              insert_1x1_conv=True,
-              image_features=image_features)
    return feature_maps.values()
--- a/research/object_detection/models/ssd_inception_v3_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_inception_v3_feature_extractor_test.py
@@ -22,7 +22,7 @@ from object_detection.models import ssd_inception_v3_feature_extractor
 class SsdInceptionV3FeatureExtractorTest(
-    ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
+    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
                                is_training=True, batch_norm_trainable=True):
@@ -49,11 +49,23 @@ class SsdInceptionV3FeatureExtractorTest(
    image_width = 128
    depth_multiplier = 1.0
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 13, 13, 288), (4, 6, 6, 768),
+    expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768),
-                                  (4, 2, 2, 2048), (4, 1, 1, 512),
+                                  (2, 2, 2, 2048), (2, 1, 1, 512),
-                                  (4, 1, 1, 256), (4, 1, 1, 128)]
+                                  (2, 1, 1, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape)
+  def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 13, 13, 288), (2, 6, 6, 768),
+                                  (2, 2, 2, 2048), (2, 1, 1, 512),
+                                  (2, 1, 1, 256), (2, 1, 1, 128)]
+    self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_299(self):
@@ -61,11 +73,11 @@ class SsdInceptionV3FeatureExtractorTest(
    image_width = 299
    depth_multiplier = 1.0
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 35, 35, 288), (4, 17, 17, 768),
+    expected_feature_map_shape = [(2, 35, 35, 288), (2, 17, 17, 768),
-                                  (4, 8, 8, 2048), (4, 4, 4, 512),
+                                  (2, 8, 8, 2048), (2, 4, 4, 512),
-                                  (4, 2, 2, 256), (4, 1, 1, 128)]
+                                  (2, 2, 2, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
@@ -73,11 +85,11 @@ class SsdInceptionV3FeatureExtractorTest(
    image_width = 299
    depth_multiplier = 0.5**12
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 35, 35, 128), (4, 17, 17, 128),
+    expected_feature_map_shape = [(2, 35, 35, 128), (2, 17, 17, 128),
-                                  (4, 8, 8, 192), (4, 4, 4, 32),
+                                  (2, 8, 8, 192), (2, 4, 4, 32),
-                                  (4, 2, 2, 32), (4, 1, 1, 32)]
+                                  (2, 2, 2, 32), (2, 1, 1, 32)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
@@ -85,11 +97,11 @@ class SsdInceptionV3FeatureExtractorTest(
    image_width = 299
    depth_multiplier = 1.0
    pad_to_multiple = 32
-    expected_feature_map_shape = [(4, 37, 37, 288), (4, 18, 18, 768),
+    expected_feature_map_shape = [(2, 37, 37, 288), (2, 18, 18, 768),
-                                  (4, 8, 8, 2048), (4, 4, 4, 512),
+                                  (2, 8, 8, 2048), (2, 4, 4, 512),
-                                  (4, 2, 2, 256), (4, 1, 1, 128)]
+                                  (2, 2, 2, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_raises_error_with_invalid_image_size(self):

--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
@@ -20,6 +20,7 @@ import tensorflow as tf
 from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
 from object_detection.utils import ops
+from object_detection.utils import shape_utils
 from nets import mobilenet_v1
 slim = tf.contrib.slim
@@ -35,7 +36,9 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               pad_to_multiple,
               conv_hyperparams,
               batch_norm_trainable=True,
-               reuse_weights=None):
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False):
    """MobileNetV1 Feature Extractor for SSD Models.
    Args:
@@ -50,10 +53,14 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        (e.g. 1), it is desirable to disable batch norm update and use
        pretrained batch norm params.
      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False.
+      use_depthwise: Whether to use depthwise convolutions. Default is False.
    """
    super(SSDMobileNetV1FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        conv_hyperparams, batch_norm_trainable, reuse_weights)
+        conv_hyperparams, batch_norm_trainable, reuse_weights,
+        use_explicit_padding, use_depthwise)
  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -81,34 +88,33 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
-    preprocessed_inputs.get_shape().assert_has_rank(4)
+    preprocessed_inputs = shape_utils.check_min_image_dim(
-    shape_assert = tf.Assert(
+        33, preprocessed_inputs)
-        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
-                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
-        ['image size must at least be 33 in both height and width.'])
    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
+        'use_explicit_padding': self._use_explicit_padding,
+        'use_depthwise': self._use_depthwise,
    }
-    with tf.control_dependencies([shape_assert]):
+    with slim.arg_scope(self._conv_hyperparams):
-      with slim.arg_scope(self._conv_hyperparams):
+      # TODO: Enable fused batch norm once quantization supports it.
-        with slim.arg_scope([slim.batch_norm], fused=False):
+      with slim.arg_scope([slim.batch_norm], fused=False):
-          with tf.variable_scope('MobilenetV1',
+        with tf.variable_scope('MobilenetV1',
-                                 reuse=self._reuse_weights) as scope:
+                               reuse=self._reuse_weights) as scope:
-            _, image_features = mobilenet_v1.mobilenet_v1_base(
+          _, image_features = mobilenet_v1.mobilenet_v1_base(
-                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
-                final_endpoint='Conv2d_13_pointwise',
+              final_endpoint='Conv2d_13_pointwise',
-                min_depth=self._min_depth,
+              min_depth=self._min_depth,
-                depth_multiplier=self._depth_multiplier,
+              depth_multiplier=self._depth_multiplier,
-                scope=scope)
+              scope=scope)
-            feature_maps = feature_map_generators.multi_resolution_feature_maps(
+          feature_maps = feature_map_generators.multi_resolution_feature_maps(
-                feature_map_layout=feature_map_layout,
+              feature_map_layout=feature_map_layout,
-                depth_multiplier=self._depth_multiplier,
+              depth_multiplier=self._depth_multiplier,
-                min_depth=self._min_depth,
+              min_depth=self._min_depth,
-                insert_1x1_conv=True,
+              insert_1x1_conv=True,
-                image_features=image_features)
+              image_features=image_features)
    return feature_maps.values()
--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
@@ -24,7 +24,7 @@ slim = tf.contrib.slim
 class SsdMobilenetV1FeatureExtractorTest(
-    ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
+    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
                                is_training=True, batch_norm_trainable=True):
@@ -52,11 +52,11 @@ class SsdMobilenetV1FeatureExtractorTest(
    image_width = 128
    depth_multiplier = 1.0
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 8, 8, 512), (4, 4, 4, 1024),
+    expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024),
-                                  (4, 2, 2, 512), (4, 1, 1, 256),
+                                  (2, 2, 2, 512), (2, 1, 1, 256),
-                                  (4, 1, 1, 256), (4, 1, 1, 128)]
+                                  (2, 1, 1, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_299(self):
@@ -64,11 +64,23 @@ class SsdMobilenetV1FeatureExtractorTest(
    image_width = 299
    depth_multiplier = 1.0
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 19, 19, 512), (4, 10, 10, 1024),
+    expected_feature_map_shape = [(2, 19, 19, 512), (2, 10, 10, 1024),
-                                  (4, 5, 5, 512), (4, 3, 3, 256),
+                                  (2, 5, 5, 512), (2, 3, 3, 256),
-                                  (4, 2, 2, 256), (4, 1, 1, 128)]
+                                  (2, 2, 2, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape)
+  def test_extract_features_with_dynamic_image_shape(self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 8, 8, 512), (2, 4, 4, 1024),
+                                  (2, 2, 2, 512), (2, 1, 1, 256),
+                                  (2, 1, 1, 256), (2, 1, 1, 128)]
+    self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
@@ -76,11 +88,11 @@ class SsdMobilenetV1FeatureExtractorTest(
    image_width = 299
    depth_multiplier = 0.5**12
    pad_to_multiple = 1
-    expected_feature_map_shape = [(4, 19, 19, 32), (4, 10, 10, 32),
+    expected_feature_map_shape = [(2, 19, 19, 32), (2, 10, 10, 32),
-                                  (4, 5, 5, 32), (4, 3, 3, 32),
+                                  (2, 5, 5, 32), (2, 3, 3, 32),
-                                  (4, 2, 2, 32), (4, 1, 1, 32)]
+                                  (2, 2, 2, 32), (2, 1, 1, 32)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
@@ -88,11 +100,11 @@ class SsdMobilenetV1FeatureExtractorTest(
    image_width = 299
    depth_multiplier = 1.0
    pad_to_multiple = 32
-    expected_feature_map_shape = [(4, 20, 20, 512), (4, 10, 10, 1024),
+    expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 1024),
-                                  (4, 5, 5, 512), (4, 3, 3, 256),
+                                  (2, 5, 5, 512), (2, 3, 3, 256),
-                                  (4, 2, 2, 256), (4, 1, 1, 128)]
+                                  (2, 2, 2, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
-        image_height, image_width, depth_multiplier, pad_to_multiple,
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
  def test_extract_features_raises_error_with_invalid_image_size(self):
@@ -108,7 +120,7 @@ class SsdMobilenetV1FeatureExtractorTest(
    image_width = 128
    depth_multiplier = 1
    pad_to_multiple = 1
-    test_image = np.random.rand(4, image_height, image_width, 3)
+    test_image = np.random.rand(2, image_height, image_width, 3)
    feature_extractor = self._create_feature_extractor(depth_multiplier,
                                                       pad_to_multiple)
    preprocessed_image = feature_extractor.preprocess(test_image)

--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+"""SSD Feature Pyramid Network (FPN) feature extractors based on Resnet v1.
+See https://arxiv.org/abs/1708.02002 for details.
+"""
+import tensorflow as tf
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import feature_map_generators
+from object_detection.utils import ops
+from object_detection.utils import shape_utils
+from nets import resnet_v1
+slim = tf.contrib.slim
+class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
+  """SSD FPN feature extractor based on Resnet v1 architecture."""
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               resnet_base_fn,
+               resnet_scope_name,
+               fpn_scope_name,
+               batch_norm_trainable=True,
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False):
+    """SSD FPN feature extractor based on Resnet v1 architecture.
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+        UNUSED currently.
+      min_depth: minimum feature extractor depth. UNUSED Currently.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      resnet_base_fn: base resnet network to use.
+      resnet_scope_name: scope name under which to construct resnet
+      fpn_scope_name: scope name under which to construct the feature pyramid
+        network.
+      batch_norm_trainable: Whether to update batch norm parameters during
+        training or not. When training with a small batch size
+        (e.g. 1), it is desirable to disable batch norm update and use
+        pretrained batch norm params.
+      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False. UNUSED currently.
+      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+    Raises:
+      ValueError: On supplying invalid arguments for unused arguments.
+    """
+    super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
+        is_training, depth_multiplier, min_depth, pad_to_multiple,
+        conv_hyperparams, batch_norm_trainable, reuse_weights,
+        use_explicit_padding)
+    if self._depth_multiplier != 1.0:
+      raise ValueError('Only depth 1.0 is supported, found: {}'.
+                       format(self._depth_multiplier))
+    if self._use_explicit_padding is True:
+      raise ValueError('Explicit padding is not a valid option.')
+    self._resnet_base_fn = resnet_base_fn
+    self._resnet_scope_name = resnet_scope_name
+    self._fpn_scope_name = fpn_scope_name
+  def preprocess(self, resized_inputs):
+    """SSD preprocessing.
+    VGG style channel mean subtraction as described here:
+    https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    channel_means = [123.68, 116.779, 103.939]
+    return resized_inputs - [[channel_means]]
+  def _filter_features(self, image_features):
+    # TODO: Change resnet endpoint to strip scope prefixes instead
+    # of munging the scope here.
+    filtered_image_features = dict({})
+    for key, feature in image_features.items():
+      feature_name = key.split('/')[-1]
+      if feature_name in ['block2', 'block3', 'block4']:
+        filtered_image_features[feature_name] = feature
+    return filtered_image_features
+  def extract_features(self, preprocessed_inputs):
+    """Extract features from preprocessed inputs.
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+    Raises:
+      ValueError: depth multiplier is not supported.
+    """
+    if self._depth_multiplier != 1.0:
+      raise ValueError('Depth multiplier not supported.')
+    preprocessed_inputs = shape_utils.check_min_image_dim(
+        129, preprocessed_inputs)
+    with tf.variable_scope(
+        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
+      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
+        _, image_features = self._resnet_base_fn(
+            inputs=ops.pad_to_multiple(preprocessed_inputs,
+                                       self._pad_to_multiple),
+            num_classes=None,
+            is_training=self._is_training and self._batch_norm_trainable,
+            global_pool=False,
+            output_stride=None,
+            store_non_strided_activations=True,
+            scope=scope)
+      image_features = self._filter_features(image_features)
+      last_feature_map = image_features['block4']
+    with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
+      with slim.arg_scope(self._conv_hyperparams):
+        for i in range(5, 7):
+          last_feature_map = slim.conv2d(
+              last_feature_map,
+              num_outputs=256,
+              kernel_size=[3, 3],
+              stride=2,
+              padding='SAME',
+              scope='block{}'.format(i))
+          image_features['bottomup_{}'.format(i)] = last_feature_map
+        feature_maps = feature_map_generators.fpn_top_down_feature_maps(
+            [
+                image_features[key] for key in
+                ['block2', 'block3', 'block4', 'bottomup_5', 'bottomup_6']
+            ],
+            depth=256,
+            scope='top_down_features')
+    return feature_maps.values()
+class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               batch_norm_trainable=True,
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False):
+    """Resnet50 v1 FPN Feature Extractor for SSD Models.
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      batch_norm_trainable: Whether to update batch norm parameters during
+        training or not. When training with a small batch size
+        (e.g. 1), it is desirable to disable batch norm update and use
+        pretrained batch norm params.
+      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False. UNUSED currently.
+      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+    """
+    super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
+        is_training, depth_multiplier, min_depth, pad_to_multiple,
+        conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
+        batch_norm_trainable, reuse_weights, use_explicit_padding)
+class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               batch_norm_trainable=True,
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False):
+    """Resnet101 v1 FPN Feature Extractor for SSD Models.
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      batch_norm_trainable: Whether to update batch norm parameters during
+        training or not. When training with a small batch size
+        (e.g. 1), it is desirable to disable batch norm update and use
+        pretrained batch norm params.
+      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False. UNUSED currently.
+      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+    """
+    super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
+        is_training, depth_multiplier, min_depth, pad_to_multiple,
+        conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
+        batch_norm_trainable, reuse_weights, use_explicit_padding)
+class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               batch_norm_trainable=True,
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False):
+    """Resnet152 v1 FPN Feature Extractor for SSD Models.
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      batch_norm_trainable: Whether to update batch norm parameters during
+        training or not. When training with a small batch size
+        (e.g. 1), it is desirable to disable batch norm update and use
+        pretrained batch norm params.
+      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False. UNUSED currently.
+      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+    """
+    super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
+        is_training, depth_multiplier, min_depth, pad_to_multiple,
+        conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
+        batch_norm_trainable, reuse_weights, use_explicit_padding)