Merge pull request #3494 from pkulzc/master

Update object detection with internal changes and remove unused BUILD files.

Merge pull request #3494 from pkulzc/master
Update object detection with internal changes and remove unused BUILD files.
3f78f4cf · derekjchow · GitHub · 73748d01 · 0319908c · 3f78f4cf
Unverified Commit 3f78f4cf authored Mar 09, 2018 by derekjchow Committed by GitHub Mar 09, 2018
20 changed files
--- a/research/object_detection/meta_architectures/ssd_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch_test.py
@@ -61,12 +61,12 @@ class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
    return [1]
  def _generate(self, feature_map_shape_list, im_height, im_width):
-    return box_list.BoxList(
+    return [box_list.BoxList(
        tf.constant([[0, 0, .5, .5],
                     [0, .5, .5, 1],
                     [.5, 0, 1, .5],
                     [1., 1., 1.5, 1.5]  # Anchor that is outside clip_window.
-                    ], tf.float32))
+                    ], tf.float32))]
  def num_anchors(self):
    return 4
@@ -74,7 +74,8 @@ class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
 class SsdMetaArchTest(test_case.TestCase):
-  def _create_model(self, apply_hard_mining=True):
+  def _create_model(self, apply_hard_mining=True,
+                    normalize_loc_loss_by_codesize=False):
    is_training = False
    num_classes = 1
    mock_anchor_generator = MockAnchorGenerator2x2()
@@ -98,6 +99,7 @@ class SsdMetaArchTest(test_case.TestCase):
        max_total_size=5)
    classification_loss_weight = 1.0
    localization_loss_weight = 1.0
+    negative_class_weight = 1.0
    normalize_loss_by_num_matches = False
    hard_example_miner = None
@@ -111,10 +113,11 @@ class SsdMetaArchTest(test_case.TestCase):
    model = ssd_meta_arch.SSDMetaArch(
        is_training, mock_anchor_generator, mock_box_predictor, mock_box_coder,
        fake_feature_extractor, mock_matcher, region_similarity_calculator,
-        encode_background_as_zeros, image_resizer_fn, non_max_suppression_fn,
+        encode_background_as_zeros, negative_class_weight, image_resizer_fn,
-        tf.identity, classification_loss, localization_loss,
+        non_max_suppression_fn, tf.identity, classification_loss,
-        classification_loss_weight, localization_loss_weight,
+        localization_loss, classification_loss_weight, localization_loss_weight,
-        normalize_loss_by_num_matches, hard_example_miner, add_summaries=False)
+        normalize_loss_by_num_matches, hard_example_miner, add_summaries=False,
+        normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize)
    return model, num_classes, mock_anchor_generator.num_anchors(), code_size
  def test_preprocess_preserves_shapes_with_dynamic_input_image(self):
@@ -287,6 +290,37 @@ class SsdMetaArchTest(test_case.TestCase):
    self.assertAllClose(localization_loss, expected_localization_loss)
    self.assertAllClose(classification_loss, expected_classification_loss)
+  def test_loss_results_are_correct_with_normalize_by_codesize_true(self):
+    with tf.Graph().as_default():
+      _, _, _, _ = self._create_model()
+    def graph_fn(preprocessed_tensor, groundtruth_boxes1, groundtruth_boxes2,
+                 groundtruth_classes1, groundtruth_classes2):
+      groundtruth_boxes_list = [groundtruth_boxes1, groundtruth_boxes2]
+      groundtruth_classes_list = [groundtruth_classes1, groundtruth_classes2]
+      model, _, _, _ = self._create_model(apply_hard_mining=False,
+                                          normalize_loc_loss_by_codesize=True)
+      model.provide_groundtruth(groundtruth_boxes_list,
+                                groundtruth_classes_list)
+      prediction_dict = model.predict(preprocessed_tensor,
+                                      true_image_shapes=None)
+      loss_dict = model.loss(prediction_dict, true_image_shapes=None)
+      return (loss_dict['localization_loss'],)
+    batch_size = 2
+    preprocessed_input = np.random.rand(batch_size, 2, 2, 3).astype(np.float32)
+    groundtruth_boxes1 = np.array([[0, 0, 1, 1]], dtype=np.float32)
+    groundtruth_boxes2 = np.array([[0, 0, 1, 1]], dtype=np.float32)
+    groundtruth_classes1 = np.array([[1]], dtype=np.float32)
+    groundtruth_classes2 = np.array([[1]], dtype=np.float32)
+    expected_localization_loss = 0.5 / 4
+    localization_loss = self.execute(graph_fn, [preprocessed_input,
+                                                groundtruth_boxes1,
+                                                groundtruth_boxes2,
+                                                groundtruth_classes1,
+                                                groundtruth_classes2])
+    self.assertAllClose(localization_loss, expected_localization_loss)
  def test_loss_results_are_correct_with_hard_example_mining(self):
    with tf.Graph().as_default():

--- a/research/object_detection/metrics/BUILD
+++ b/research/object_detection/metrics/BUILD
-# Tensorflow Object Detection API: main runnables.
-package(
-    default_visibility = ["//visibility:public"],
-)
-licenses(["notice"])
-# Apache 2.0
-py_library(
-    name = "coco_tools",
-    srcs = [
-        "coco_tools.py",
-    ],
-    deps = [
-        "//file/localfile",
-        "//file/placer",
-        "//pycocotools",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/utils:json_utils",
-    ],
-)
-py_test(
-    name = "coco_tools_test",
-    srcs = [
-        "coco_tools_test.py",
-    ],
-    deps = [
-        ":coco_tools",
-        "//testing/pybase",
-        "//numpy",
-    ],
-)
-py_library(
-    name = "coco_evaluation",
-    srcs = [
-        "coco_evaluation.py",
-    ],
-    deps = [
-        ":coco_tools",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/core:standard_fields",
-        "//tensorflow/models/research/object_detection/utils:object_detection_evaluation",
-    ],
-)
-py_test(
-    name = "coco_evaluation_test",
-    srcs = [
-        "coco_evaluation_test.py",
-    ],
-    deps = [
-        ":coco_evaluation",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/core:standard_fields",
-    ],
-)
-py_binary(
-    name = "offline_eval_map_corloc",
-    srcs = [
-        "offline_eval_map_corloc.py",
-    ],
-    deps = [
-        ":tf_example_parser",
-        "//tensorflow/models/research/object_detection:evaluator",
-        "//tensorflow/models/research/object_detection/builders:input_reader_builder",
-        "//tensorflow/models/research/object_detection/core:standard_fields",
-        "//tensorflow/models/research/object_detection/utils:config_util",
-        "//tensorflow/models/research/object_detection/utils:label_map_util",
-    ],
-)
-py_test(
-    name = "offline_eval_map_corloc_test",
-    srcs = [
-        "offline_eval_map_corloc_test.py",
-    ],
-    deps = [
-        ":offline_eval_map_corloc",
-        "//tensorflow",
-    ],
-)
-py_library(
-    name = "tf_example_parser",
-    srcs = ["tf_example_parser.py"],
-    deps = [
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/core:data_parser",
-        "//tensorflow/models/research/object_detection/core:standard_fields",
-    ],
-)
-py_test(
-    name = "tf_example_parser_test",
-    srcs = ["tf_example_parser_test.py"],
-    deps = [
-        ":tf_example_parser",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/core:standard_fields",
-    ],
-)
--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -74,23 +74,30 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
          [ymin, xmin, ymax, xmax] in absolute image coordinates.
        InputDataFields.groundtruth_classes: integer numpy array of shape
          [num_boxes] containing 1-indexed groundtruth classes for the boxes.
+        InputDataFields.groundtruth_is_crowd (optional): integer numpy array of
+          shape [num_boxes] containing iscrowd flag for groundtruth boxes.
    """
    if image_id in self._image_ids:
      tf.logging.warning('Ignoring ground truth with image id %s since it was '
                         'previously added', image_id)
      return
+    groundtruth_is_crowd = groundtruth_dict.get(
+        standard_fields.InputDataFields.groundtruth_is_crowd)
+    # Drop groundtruth_is_crowd if empty tensor.
+    if groundtruth_is_crowd is not None and not groundtruth_is_crowd.shape[0]:
+      groundtruth_is_crowd = None
    self._groundtruth_list.extend(
-        coco_tools.
+        coco_tools.ExportSingleImageGroundtruthToCoco(
-        ExportSingleImageGroundtruthToCoco(
            image_id=image_id,
            next_annotation_id=self._annotation_id,
            category_id_set=self._category_id_set,
-            groundtruth_boxes=groundtruth_dict[standard_fields.InputDataFields.
+            groundtruth_boxes=groundtruth_dict[
-                                               groundtruth_boxes],
+                standard_fields.InputDataFields.groundtruth_boxes],
-            groundtruth_classes=groundtruth_dict[standard_fields.
+            groundtruth_classes=groundtruth_dict[
-                                                 InputDataFields.
+                standard_fields.InputDataFields.groundtruth_classes],
-                                                 groundtruth_classes]))
+            groundtruth_is_crowd=groundtruth_is_crowd))
    self._annotation_id += groundtruth_dict[standard_fields.InputDataFields.
                                            groundtruth_boxes].shape[0]
    self._image_ids[image_id] = False

--- a/research/object_detection/metrics/coco_evaluation_test.py
+++ b/research/object_detection/metrics/coco_evaluation_test.py
@@ -86,6 +86,78 @@ class CocoDetectionEvaluationTest(tf.test.TestCase):
    metrics = coco_evaluator.evaluate()
    self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
+  def testGetOneMAPWithMatchingGroundtruthAndDetectionsSkipCrowd(self):
+    """Tests computing mAP with is_crowd GT boxes skipped."""
+    category_list = [{
+        'id': 0,
+        'name': 'person'
+    }, {
+        'id': 1,
+        'name': 'cat'
+    }, {
+        'id': 2,
+        'name': 'dog'
+    }]
+    coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
+    coco_evaluator.add_single_ground_truth_image_info(
+        image_id='image1',
+        groundtruth_dict={
+            standard_fields.InputDataFields.groundtruth_boxes:
+                np.array([[100., 100., 200., 200.], [99., 99., 200., 200.]]),
+            standard_fields.InputDataFields.groundtruth_classes:
+                np.array([1, 2]),
+            standard_fields.InputDataFields.groundtruth_is_crowd:
+                np.array([0, 1])
+        })
+    coco_evaluator.add_single_detected_image_info(
+        image_id='image1',
+        detections_dict={
+            standard_fields.DetectionResultFields.detection_boxes:
+                np.array([[100., 100., 200., 200.]]),
+            standard_fields.DetectionResultFields.detection_scores:
+                np.array([.8]),
+            standard_fields.DetectionResultFields.detection_classes:
+                np.array([1])
+        })
+    metrics = coco_evaluator.evaluate()
+    self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
+  def testGetOneMAPWithMatchingGroundtruthAndDetectionsEmptyCrowd(self):
+    """Tests computing mAP with empty is_crowd array passed in."""
+    category_list = [{
+        'id': 0,
+        'name': 'person'
+    }, {
+        'id': 1,
+        'name': 'cat'
+    }, {
+        'id': 2,
+        'name': 'dog'
+    }]
+    coco_evaluator = coco_evaluation.CocoDetectionEvaluator(category_list)
+    coco_evaluator.add_single_ground_truth_image_info(
+        image_id='image1',
+        groundtruth_dict={
+            standard_fields.InputDataFields.groundtruth_boxes:
+                np.array([[100., 100., 200., 200.]]),
+            standard_fields.InputDataFields.groundtruth_classes:
+                np.array([1]),
+            standard_fields.InputDataFields.groundtruth_is_crowd:
+                np.array([])
+        })
+    coco_evaluator.add_single_detected_image_info(
+        image_id='image1',
+        detections_dict={
+            standard_fields.DetectionResultFields.detection_boxes:
+                np.array([[100., 100., 200., 200.]]),
+            standard_fields.DetectionResultFields.detection_scores:
+                np.array([.8]),
+            standard_fields.DetectionResultFields.detection_classes:
+                np.array([1])
+        })
+    metrics = coco_evaluator.evaluate()
+    self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
  def testRejectionOnDuplicateGroundtruth(self):
    """Tests that groundtruth cannot be added more than once for an image."""
    categories = [{'id': 1, 'name': 'cat'},

--- a/research/object_detection/metrics/coco_tools.py
+++ b/research/object_detection/metrics/coco_tools.py
@@ -17,7 +17,7 @@
 Note that nothing in this file is tensorflow related and thus cannot
 be called directly as a slim metric, for example.
-TODO: wrap as a slim metric in metrics.py
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
 Usage example: given a set of images with ids in the list image_ids
@@ -327,7 +327,8 @@ def ExportSingleImageGroundtruthToCoco(image_id,
                                       category_id_set,
                                       groundtruth_boxes,
                                       groundtruth_classes,
-                                       groundtruth_masks=None):
+                                       groundtruth_masks=None,
+                                       groundtruth_is_crowd=None):
  """Export groundtruth of a single image to COCO format.
  This function converts groundtruth detection annotations represented as numpy
@@ -338,8 +339,7 @@ def ExportSingleImageGroundtruthToCoco(image_id,
  groundtruth_classes[i] are associated with the same groundtruth annotation.
  In the exported result, "area" fields are always set to the area of the
-  groundtruth bounding box and "iscrowd" fields are always set to 0.
+  groundtruth bounding box.
-  TODO: pass in "iscrowd" array for evaluating on COCO dataset.
  Args:
    image_id: a unique image identifier either of type integer or string.
@@ -352,6 +352,8 @@ def ExportSingleImageGroundtruthToCoco(image_id,
    groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
    groundtruth_masks: optional uint8 numpy array of shape [num_detections,
      image_height, image_width] containing detection_masks.
+    groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+      indicating whether groundtruth boxes are crowd.
  Returns:
    a list of groundtruth annotations for a single image in the COCO format.
@@ -379,17 +381,27 @@ def ExportSingleImageGroundtruthToCoco(image_id,
                     'Classes shape: %d. Boxes shape: %d. Image ID: %s' % (
                         groundtruth_classes.shape[0],
                         groundtruth_boxes.shape[0], image_id))
+  has_is_crowd = groundtruth_is_crowd is not None
+  if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+    raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
  groundtruth_list = []
  for i in range(num_boxes):
    if groundtruth_classes[i] in category_id_set:
+      iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
      export_dict = {
-          'id': next_annotation_id + i,
+          'id':
-          'image_id': image_id,
+              next_annotation_id + i,
-          'category_id': int(groundtruth_classes[i]),
+          'image_id':
-          'bbox': list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+              image_id,
-          'area': float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+          'category_id':
-                        (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+              int(groundtruth_classes[i]),
-          'iscrowd': 0
+          'bbox':
+              list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+          'area':
+              float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                    (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+          'iscrowd':
+              iscrowd
      }
      if groundtruth_masks is not None:
        export_dict['segmentation'] = _RleCompress(groundtruth_masks[i])
@@ -416,7 +428,7 @@ def ExportGroundtruthToCOCO(image_ids,
  In the exported result, "area" fields are always set to the area of the
  groundtruth bounding box and "iscrowd" fields are always set to 0.
-  TODO: pass in "iscrowd" array for evaluating on COCO dataset.
+  TODO(jonathanhuang): pass in "iscrowd" array for evaluating on COCO dataset.
  Args:
    image_ids: a list of unique image identifier either of type integer or

--- a/research/object_detection/metrics/coco_tools_test.py
+++ b/research/object_detection/metrics/coco_tools_test.py
@@ -248,7 +248,11 @@ class CocoToolsTest(tf.test.TestCase):
                           [0, 0, .5, .5],
                           [.5, .5, .5, .5]], dtype=np.float32)
    classes = np.array([1, 2, 3], dtype=np.int32)
+    is_crowd = np.array([0, 1, 0], dtype=np.int32)
    next_annotation_id = 1
+    expected_counts = ['04', '31', '4']
+    # Tests exporting without passing in is_crowd (for backward compatibility).
    coco_annotations = coco_tools.ExportSingleImageGroundtruthToCoco(
        image_id='first_image',
        category_id_set=set([1, 2, 3]),
@@ -256,7 +260,6 @@ class CocoToolsTest(tf.test.TestCase):
        groundtruth_boxes=boxes,
        groundtruth_classes=classes,
        groundtruth_masks=masks)
-    expected_counts = ['04', '31', '4']
    for i, annotation in enumerate(coco_annotations):
      self.assertEqual(annotation['segmentation']['counts'],
                       expected_counts[i])
@@ -267,6 +270,26 @@ class CocoToolsTest(tf.test.TestCase):
      self.assertEqual(annotation['category_id'], classes[i])
      self.assertEqual(annotation['id'], i + next_annotation_id)
+    # Tests exporting with is_crowd.
+    coco_annotations = coco_tools.ExportSingleImageGroundtruthToCoco(
+        image_id='first_image',
+        category_id_set=set([1, 2, 3]),
+        next_annotation_id=next_annotation_id,
+        groundtruth_boxes=boxes,
+        groundtruth_classes=classes,
+        groundtruth_masks=masks,
+        groundtruth_is_crowd=is_crowd)
+    for i, annotation in enumerate(coco_annotations):
+      self.assertEqual(annotation['segmentation']['counts'],
+                       expected_counts[i])
+      self.assertTrue(np.all(np.equal(mask.decode(
+          annotation['segmentation']), masks[i])))
+      self.assertTrue(np.all(np.isclose(annotation['bbox'], coco_boxes[i])))
+      self.assertEqual(annotation['image_id'], 'first_image')
+      self.assertEqual(annotation['category_id'], classes[i])
+      self.assertEqual(annotation['iscrowd'], is_crowd[i])
+      self.assertEqual(annotation['id'], i + next_annotation_id)
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/model.py
+++ b/research/object_detection/model.py
@@ -32,6 +32,7 @@ import tensorflow as tf
 from google.protobuf import text_format
 from tensorflow.contrib.learn.python.learn import learn_runner
 from tensorflow.contrib.tpu.python.tpu import tpu_optimizer
+from tensorflow.python.lib.io import file_io
 from object_detection import eval_util
 from object_detection import inputs
 from object_detection import model_hparams
@@ -54,6 +55,20 @@ tf.flags.DEFINE_integer('num_eval_steps', 10000, 'Number of train steps.')
 FLAGS = tf.flags.FLAGS
+# A map of names to methods that help build the model.
+MODEL_BUILD_UTIL_MAP = {
+    'get_configs_from_pipeline_file':
+        config_util.get_configs_from_pipeline_file,
+    'create_pipeline_proto_from_configs':
+        config_util.create_pipeline_proto_from_configs,
+    'merge_external_params_with_configs':
+        config_util.merge_external_params_with_configs,
+    'create_train_input_fn': inputs.create_train_input_fn,
+    'create_eval_input_fn': inputs.create_eval_input_fn,
+    'create_predict_input_fn': inputs.create_predict_input_fn,
+}
 def _get_groundtruth_data(detection_model, class_agnostic):
  """Extracts groundtruth data from detection_model.
@@ -319,9 +334,11 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
      else:
        category_index = label_map_util.create_category_index_from_labelmap(
            eval_input_config.label_map_path)
-      detection_and_groundtruth = vis_utils.draw_side_by_side_evaluation_image(
-          eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2)
      if not use_tpu:
+        detection_and_groundtruth = (
+            vis_utils.draw_side_by_side_evaluation_image(
+                eval_dict, category_index, max_boxes_to_draw=20,
+                min_score_thresh=0.2))
        tf.summary.image('Detections_Left_Groundtruth_Right',
                         detection_and_groundtruth)
@@ -411,8 +428,18 @@ def populate_experiment(run_config,
    An `Experiment` that defines all aspects of training, evaluation, and
    export.
  """
-  configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
+  get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
-  configs = config_util.merge_external_params_with_configs(
+      'get_configs_from_pipeline_file']
+  create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
+      'create_pipeline_proto_from_configs']
+  merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[
+      'merge_external_params_with_configs']
+  create_train_input_fn = MODEL_BUILD_UTIL_MAP['create_train_input_fn']
+  create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn']
+  create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn']
+  configs = get_configs_from_pipeline_file(pipeline_config_path)
+  configs = merge_external_params_with_configs(
      configs,
      hparams,
      train_steps=train_steps,
@@ -424,28 +451,28 @@ def populate_experiment(run_config,
  eval_config = configs['eval_config']
  eval_input_config = configs['eval_input_config']
-  if train_steps is None:
+  if train_steps is None and train_config.num_steps:
-    train_steps = train_config.num_steps if train_config.num_steps else None
+    train_steps = train_config.num_steps
-  if eval_steps is None:
+  if eval_steps is None and eval_config.num_examples:
-    eval_steps = eval_config.num_examples if eval_config.num_examples else None
+    eval_steps = eval_config.num_examples
  detection_model_fn = functools.partial(
      model_builder.build, model_config=model_config)
  # Create the input functions for TRAIN/EVAL.
-  train_input_fn = inputs.create_train_input_fn(
+  train_input_fn = create_train_input_fn(
      train_config=train_config,
      train_input_config=train_input_config,
      model_config=model_config)
-  eval_input_fn = inputs.create_eval_input_fn(
+  eval_input_fn = create_eval_input_fn(
      eval_config=eval_config,
      eval_input_config=eval_input_config,
      model_config=model_config)
  export_strategies = [
      tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
-          serving_input_fn=inputs.create_predict_input_fn(
+          serving_input_fn=create_predict_input_fn(
              model_config=model_config))
  ]
@@ -455,8 +482,10 @@ def populate_experiment(run_config,
  if run_config.is_chief:
    # Store the final pipeline config for traceability.
-    pipeline_config_final = config_util.create_pipeline_proto_from_configs(
+    pipeline_config_final = create_pipeline_proto_from_configs(
        configs)
+    if not file_io.file_exists(estimator.model_dir):
+      file_io.recursive_create_dir(estimator.model_dir)
    pipeline_config_final_path = os.path.join(estimator.model_dir,
                                              'pipeline.config')
    config_text = text_format.MessageToString(pipeline_config_final)

--- a/research/object_detection/model_tpu.py
+++ b/research/object_detection/model_tpu.py
@@ -77,6 +77,10 @@ tf.flags.DEFINE_integer('min_eval_interval_secs', 180,
 tf.flags.DEFINE_integer(
    'eval_timeout_secs', None,
    'Maximum seconds between checkpoints before evaluation terminates.')
+tf.flags.DEFINE_string('hparams_overrides', None, 'Comma-separated list of '
+                       'hyperparameters to override defaults.')
+tf.flags.DEFINE_boolean('eval_training_data', False,
+                        'If training data should be evaluated for this job.')
 FLAGS = tf.flags.FLAGS
@@ -122,7 +126,10 @@ def create_estimator(run_config,
  Returns:
    Estimator: A estimator object used for training and evaluation
    train_input_fn: Input function for the training loop
-    eval_input_fn: Input function for the evaluation run
+    eval_validation_input_fn: Input function to run for evaluation on
+      validation data.
+    eval_training_input_fn: Input function to run for evaluation on
+      training data.
    train_steps: Number of training steps either from arg `train_steps` or
      `TrainConfig` proto
    eval_steps: Number of evaluation steps either from arg `eval_steps` or
@@ -141,15 +148,17 @@ def create_estimator(run_config,
  train_input_config = configs['train_input_config']
  eval_config = configs['eval_config']
  eval_input_config = configs['eval_input_config']
+  if FLAGS.eval_training_data:
+    eval_input_config = configs['train_input_config']
  if params is None:
    params = {}
-  if train_steps is None:
+  if train_steps is None and train_config.num_steps:
-    train_steps = train_config.num_steps if train_config.num_steps else None
+    train_steps = train_config.num_steps
-  if eval_steps is None:
+  if eval_steps is None and eval_config.num_examples:
-    eval_steps = eval_config.num_examples if eval_config.num_examples else None
+    eval_steps = eval_config.num_examples
  detection_model_fn = functools.partial(
      model_builder.build, model_config=model_config)
@@ -159,10 +168,14 @@ def create_estimator(run_config,
      train_config=train_config,
      train_input_config=train_input_config,
      model_config=model_config)
-  eval_input_fn = inputs.create_eval_input_fn(
+  eval_validation_input_fn = inputs.create_eval_input_fn(
      eval_config=eval_config,
      eval_input_config=eval_input_config,
      model_config=model_config)
+  eval_training_input_fn = inputs.create_eval_input_fn(
+      eval_config=eval_config,
+      eval_input_config=train_input_config,
+      model_config=model_config)
  estimator = tpu_estimator.TPUEstimator(
      model_fn=model_fn_creator(detection_model_fn, configs, hparams,
@@ -173,7 +186,8 @@ def create_estimator(run_config,
      use_tpu=use_tpu,
      config=run_config,
      params=params)
-  return estimator, train_input_fn, eval_input_fn, train_steps, eval_steps
+  return (estimator, train_input_fn, eval_validation_input_fn,
+          eval_training_input_fn, train_steps, eval_steps)
 def main(unused_argv):
@@ -204,24 +218,27 @@ def main(unused_argv):
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_shards))
  params = {}
-  estimator, train_input_fn, eval_input_fn, train_steps, eval_steps = (
+  (estimator, train_input_fn, eval_validation_input_fn, eval_training_input_fn,
-      create_estimator(
+   train_steps, eval_steps) = (
-          config,
+       create_estimator(
-          model_hparams.create_hparams(),
+           config,
-          FLAGS.pipeline_config_path,
+           model_hparams.create_hparams(
-          train_steps=FLAGS.num_train_steps,
+               hparams_overrides=FLAGS.hparams_overrides),
-          eval_steps=FLAGS.num_eval_steps,
+           FLAGS.pipeline_config_path,
-          train_batch_size=FLAGS.train_batch_size,
+           train_steps=FLAGS.num_train_steps,
-          use_tpu=FLAGS.use_tpu,
+           eval_steps=FLAGS.num_eval_steps,
-          num_shards=FLAGS.num_shards,
+           train_batch_size=FLAGS.train_batch_size,
-          params=params))
+           use_tpu=FLAGS.use_tpu,
+           num_shards=FLAGS.num_shards,
+           params=params))
  if FLAGS.mode in ['train', 'train_and_eval']:
    estimator.train(input_fn=train_input_fn, max_steps=train_steps)
  if FLAGS.mode == 'train_and_eval':
    # Eval one time.
-    eval_results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
+    eval_results = estimator.evaluate(
+        input_fn=eval_validation_input_fn, steps=eval_steps)
    tf.logging.info('Eval results: %s' % eval_results)
  # Continuously evaluating.
@@ -239,11 +256,18 @@ def main(unused_argv):
        timeout_fn=terminate_eval):
      tf.logging.info('Starting to evaluate.')
+      if FLAGS.eval_training_data:
+        name = 'training_data'
+        input_fn = eval_training_input_fn
+      else:
+        name = 'validation_data'
+        input_fn = eval_validation_input_fn
      try:
        eval_results = estimator.evaluate(
-            input_fn=eval_input_fn,
+            input_fn=input_fn,
            steps=eval_steps,
-            checkpoint_path=ckpt)
+            checkpoint_path=ckpt,
+            name=name)
        tf.logging.info('Eval results: %s' % eval_results)
        # Terminate eval job when final checkpoint is reached

--- a/research/object_detection/models/BUILD
+++ b/research/object_detection/models/BUILD
-# Tensorflow Object Detection API: Models.
-package(
-    default_visibility = ["//visibility:public"],
-)
-licenses(["notice"])
-# Apache 2.0
-py_library(
-    name = "feature_map_generators",
-    srcs = [
-        "feature_map_generators.py",
-    ],
-    deps = [
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/utils:ops",
-    ],
-)
-py_test(
-    name = "feature_map_generators_test",
-    srcs = [
-        "feature_map_generators_test.py",
-    ],
-    deps = [
-        ":feature_map_generators",
-        "//tensorflow",
-    ],
-)
-py_library(
-    name = "ssd_feature_extractor_test",
-    srcs = [
-        "ssd_feature_extractor_test.py",
-    ],
-    deps = [
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/utils:test_case",
-    ],
-)
-py_library(
-    name = "ssd_inception_v2_feature_extractor",
-    srcs = [
-        "ssd_inception_v2_feature_extractor.py",
-    ],
-    deps = [
-        ":feature_map_generators",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
-        "//tensorflow/models/research/object_detection/utils:ops",
-        "//tensorflow/models/research/object_detection/utils:shape_utils",
-        "//third_party/tensorflow_models/slim:inception_v2",
-    ],
-)
-py_library(
-    name = "ssd_inception_v3_feature_extractor",
-    srcs = [
-        "ssd_inception_v3_feature_extractor.py",
-    ],
-    deps = [
-        ":feature_map_generators",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
-        "//tensorflow/models/research/object_detection/utils:ops",
-        "//tensorflow/models/research/object_detection/utils:shape_utils",
-        "//third_party/tensorflow_models/slim:inception_v3",
-    ],
-)
-py_library(
-    name = "ssd_mobilenet_v1_feature_extractor",
-    srcs = ["ssd_mobilenet_v1_feature_extractor.py"],
-    deps = [
-        ":feature_map_generators",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
-        "//tensorflow/models/research/object_detection/utils:ops",
-        "//tensorflow/models/research/object_detection/utils:shape_utils",
-        "//third_party/tensorflow_models/slim:mobilenet_v1",
-    ],
-)
-py_library(
-    name = "embedded_ssd_mobilenet_v1_feature_extractor",
-    srcs = ["embedded_ssd_mobilenet_v1_feature_extractor.py"],
-    deps = [
-        ":feature_map_generators",
-        ":ssd_mobilenet_v1_feature_extractor",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/utils:ops",
-        "//third_party/tensorflow_models/slim:mobilenet_v1",
-    ],
-)
-py_library(
-    name = "ssd_resnet_v1_fpn_feature_extractor",
-    srcs = ["ssd_resnet_v1_fpn_feature_extractor.py"],
-    deps = [
-        ":feature_map_generators",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/meta_architectures:ssd_meta_arch",
-        "//tensorflow/models/research/object_detection/utils:ops",
-        "//tensorflow/models/research/object_detection/utils:shape_utils",
-        "//third_party/tensorflow_models/slim:resnet_v1",
-    ],
-)
-py_library(
-    name = "ssd_resnet_v1_fpn_feature_extractor_testbase",
-    srcs = ["ssd_resnet_v1_fpn_feature_extractor_testbase.py"],
-    deps = [
-        "//tensorflow/models/research/object_detection/models:ssd_feature_extractor_test",
-    ],
-)
-py_test(
-    name = "ssd_resnet_v1_fpn_feature_extractor_test",
-    timeout = "long",
-    srcs = ["ssd_resnet_v1_fpn_feature_extractor_test.py"],
-    deps = [
-        ":ssd_resnet_v1_fpn_feature_extractor",
-        ":ssd_resnet_v1_fpn_feature_extractor_testbase",
-        "//tensorflow",
-    ],
-)
-py_test(
-    name = "ssd_inception_v2_feature_extractor_test",
-    srcs = [
-        "ssd_inception_v2_feature_extractor_test.py",
-    ],
-    deps = [
-        ":ssd_feature_extractor_test",
-        ":ssd_inception_v2_feature_extractor",
-        "//tensorflow",
-    ],
-)
-py_test(
-    name = "ssd_inception_v3_feature_extractor_test",
-    srcs = [
-        "ssd_inception_v3_feature_extractor_test.py",
-    ],
-    deps = [
-        ":ssd_feature_extractor_test",
-        ":ssd_inception_v3_feature_extractor",
-        "//tensorflow",
-    ],
-)
-py_test(
-    name = "ssd_mobilenet_v1_feature_extractor_test",
-    srcs = ["ssd_mobilenet_v1_feature_extractor_test.py"],
-    deps = [
-        ":ssd_feature_extractor_test",
-        ":ssd_mobilenet_v1_feature_extractor",
-        "//tensorflow",
-    ],
-)
-py_test(
-    name = "embedded_ssd_mobilenet_v1_feature_extractor_test",
-    srcs = ["embedded_ssd_mobilenet_v1_feature_extractor_test.py"],
-    deps = [
-        ":embedded_ssd_mobilenet_v1_feature_extractor",
-        ":ssd_feature_extractor_test",
-        "//tensorflow",
-    ],
-)
-py_test(
-    name = "faster_rcnn_nas_feature_extractor_test",
-    srcs = [
-        "faster_rcnn_nas_feature_extractor_test.py",
-    ],
-    deps = [
-        ":faster_rcnn_nas_feature_extractor",
-        "//tensorflow",
-    ],
-)
-py_library(
-    name = "faster_rcnn_nas_feature_extractor",
-    srcs = [
-        "faster_rcnn_nas_feature_extractor.py",
-    ],
-    deps = [
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
-        "//third_party/tensorflow_models/slim:nasnet",
-    ],
-)
-py_library(
-    name = "faster_rcnn_inception_resnet_v2_feature_extractor",
-    srcs = [
-        "faster_rcnn_inception_resnet_v2_feature_extractor.py",
-    ],
-    deps = [
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
-        "//third_party/tensorflow_models/slim:inception_resnet_v2",
-    ],
-)
-py_test(
-    name = "faster_rcnn_inception_resnet_v2_feature_extractor_test",
-    srcs = [
-        "faster_rcnn_inception_resnet_v2_feature_extractor_test.py",
-    ],
-    deps = [
-        ":faster_rcnn_inception_resnet_v2_feature_extractor",
-        "//tensorflow",
-    ],
-)
-py_library(
-    name = "faster_rcnn_inception_v2_feature_extractor",
-    srcs = [
-        "faster_rcnn_inception_v2_feature_extractor.py",
-    ],
-    deps = [
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
-        "//third_party/tensorflow_models/slim:inception_v2",
-    ],
-)
-py_test(
-    name = "faster_rcnn_inception_v2_feature_extractor_test",
-    srcs = [
-        "faster_rcnn_inception_v2_feature_extractor_test.py",
-    ],
-    deps = [
-        ":faster_rcnn_inception_v2_feature_extractor",
-        "//tensorflow",
-    ],
-)
-py_library(
-    name = "faster_rcnn_resnet_v1_feature_extractor",
-    srcs = [
-        "faster_rcnn_resnet_v1_feature_extractor.py",
-    ],
-    deps = [
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/meta_architectures:faster_rcnn_meta_arch",
-        "//third_party/tensorflow_models/slim:resnet_utils",
-        "//third_party/tensorflow_models/slim:resnet_v1",
-    ],
-)
-py_test(
-    name = "faster_rcnn_resnet_v1_feature_extractor_test",
-    srcs = [
-        "faster_rcnn_resnet_v1_feature_extractor_test.py",
-    ],
-    deps = [
-        ":faster_rcnn_resnet_v1_feature_extractor",
-        "//tensorflow",
-    ],
-)
--- a/research/object_detection/models/faster_rcnn_nas_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_nas_feature_extractor.py
@@ -108,7 +108,7 @@ def _build_nasnet_base(hidden_previous,
  return net
-# TODO: Only fixed_shape_resizer is currently supported for NASNet
+# TODO(shlens): Only fixed_shape_resizer is currently supported for NASNet
 # featurization. The reason for this is that nasnet.py only supports
 # inputs with fully known shapes. We need to update nasnet.py to handle
 # shapes not known at compile time.
@@ -182,10 +182,14 @@ class FasterRCNNNASFeatureExtractor(
    with slim.arg_scope(nasnet_large_arg_scope_for_detection(
        is_batch_norm_training=self._train_batch_norm)):
-      _, end_points = nasnet.build_nasnet_large(
+      with arg_scope([slim.conv2d,
-          preprocessed_inputs, num_classes=None,
+                      slim.batch_norm,
-          is_training=self._is_training,
+                      slim.separable_conv2d],
-          final_endpoint='Cell_11')
+                     reuse=self._reuse_weights):
+        _, end_points = nasnet.build_nasnet_large(
+            preprocessed_inputs, num_classes=None,
+            is_training=self._is_training,
+            final_endpoint='Cell_11')
    # Note that both 'Cell_10' and 'Cell_11' have equal depth = 2016.
    rpn_feature_map = tf.concat([end_points['Cell_10'],

--- a/research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
@@ -111,7 +111,7 @@ class FasterRCNNResnetV1FeatureExtractor(
    with tf.control_dependencies([shape_assert]):
      # Disables batchnorm for fine-tuning with smaller batch sizes.
-      # TODO: Figure out if it is needed when image
+      # TODO(chensun): Figure out if it is needed when image
      # batch size is bigger.
      with slim.arg_scope(
          resnet_utils.resnet_arg_scope(

--- a/research/object_detection/models/feature_map_generators_test.py
+++ b/research/object_detection/models/feature_map_generators_test.py
@@ -40,7 +40,7 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
 }
-# TODO: add tests with different anchor strides.
+# TODO(rathodv): add tests with different anchor strides.
 class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
  def test_get_expected_feature_map_shapes_with_inception_v2(self):

--- a/research/object_detection/models/ssd_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_feature_extractor_test.py
@@ -27,13 +27,17 @@ from object_detection.utils import test_case
 class SsdFeatureExtractorTestBase(test_case.TestCase):
  @abstractmethod
-  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple):
+  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+                                use_explicit_padding=False):
    """Constructs a new feature extractor.
    Args:
      depth_multiplier: float depth multiplier for feature extractor
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
+      use_explicit_padding: use 'VALID' padding for convolutions, but prepad
+        inputs so that the output dimensions are the same as if 'SAME' padding
+        were used.
    Returns:
      an ssd_meta_arch.SSDFeatureExtractor object.
    """
@@ -41,10 +45,11 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
  def check_extract_features_returns_correct_shape(
      self, batch_size, image_height, image_width, depth_multiplier,
-      pad_to_multiple, expected_feature_map_shapes):
+      pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False):
    def graph_fn(image_tensor):
      feature_extractor = self._create_feature_extractor(depth_multiplier,
-                                                         pad_to_multiple)
+                                                         pad_to_multiple,
+                                                         use_explicit_padding)
      feature_maps = feature_extractor.extract_features(image_tensor)
      return feature_maps
@@ -57,10 +62,11 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
  def check_extract_features_returns_correct_shapes_with_dynamic_inputs(
      self, batch_size, image_height, image_width, depth_multiplier,
-      pad_to_multiple, expected_feature_map_shapes):
+      pad_to_multiple, expected_feature_map_shapes, use_explicit_padding=False):
    def graph_fn(image_height, image_width):
      feature_extractor = self._create_feature_extractor(depth_multiplier,
-                                                         pad_to_multiple)
+                                                         pad_to_multiple,
+                                                         use_explicit_padding)
      image_tensor = tf.random_uniform([batch_size, image_height, image_width,
                                        3], dtype=tf.float32)
      feature_maps = feature_extractor.extract_features(image_tensor)

--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
@@ -53,8 +53,9 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        (e.g. 1), it is desirable to disable batch norm update and use
        pretrained batch norm params.
      reuse_weights: Whether to reuse variables. Default is None.
-      use_explicit_padding: Whether to use explicit padding when extracting
+      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
-        features. Default is False.
+        inputs so that the output dimensions are the same as if 'SAME' padding
+        were used.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
    """
    super(SSDMobileNetV1FeatureExtractor, self).__init__(
@@ -100,7 +101,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    }
    with slim.arg_scope(self._conv_hyperparams):
-      # TODO: Enable fused batch norm once quantization supports it.
+      # TODO(skligys): Enable fused batch norm once quantization supports it.
      with slim.arg_scope([slim.batch_norm], fused=False):
        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
@@ -109,6 +110,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
+              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,

--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """SSD Feature Pyramid Network (FPN) feature extractors based on Resnet v1.
 See https://arxiv.org/abs/1708.02002 for details.
@@ -87,7 +101,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    return resized_inputs - [[channel_means]]
  def _filter_features(self, image_features):
-    # TODO: Change resnet endpoint to strip scope prefixes instead
+    # TODO(rathodv): Change resnet endpoint to strip scope prefixes instead
    # of munging the scope here.
    filtered_image_features = dict({})
    for key, feature in image_features.items():

--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Tests for ssd resnet v1 FPN feature extractors."""
 import abc
 import numpy as np

--- a/research/object_detection/protos/BUILD
+++ b/research/object_detection/protos/BUILD
--- a/research/object_detection/protos/hyperparams.proto
+++ b/research/object_detection/protos/hyperparams.proto
@@ -65,6 +65,7 @@ message Initializer {
  oneof initializer_oneof {
    TruncatedNormalInitializer truncated_normal_initializer = 1;
    VarianceScalingInitializer variance_scaling_initializer = 2;
+    RandomNormalInitializer random_normal_initializer = 3;
  }
 }
@@ -89,6 +90,13 @@ message VarianceScalingInitializer {
  optional Mode mode = 3 [default = FAN_IN];
 }
+// Configuration proto for random normal initializer. See
+// https://www.tensorflow.org/api_docs/python/tf/random_normal_initializer
+message RandomNormalInitializer {
+  optional float mean = 1 [default = 0.0];
+  optional float stddev = 2 [default = 1.0];
+}
 // Configuration proto for batch norm to apply after convolution op. See
 // https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
 message BatchNorm {