Merge pull request #4460 from pkulzc/master

Release evaluation code for OI Challenge 2018 and minor fixes.

Merge pull request #4460 from pkulzc/master
Release evaluation code for OI Challenge 2018 and minor fixes.
97760186 · Jonathan Huang · GitHub · ed901b73 · a703fc0c · 97760186
Unverified Commit 97760186 authored Jun 05, 2018 by Jonathan Huang Committed by GitHub Jun 05, 2018
20 changed files
--- a/research/object_detection/dataset_tools/tf_record_creation_util.py
+++ b/research/object_detection/dataset_tools/tf_record_creation_util.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Utilities for creating TFRecords of TF examples for the Open Images dataset.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
+  """Opens all TFRecord shards for writing and adds them to an exit stack.
+  Args:
+    exit_stack: A context2.ExitStack used to automatically closed the TFRecords
+      opened in this function.
+    base_path: The base path for all shards
+    num_shards: The number of shards
+  Returns:
+    The list of opened TFRecords. Position k in the list corresponds to shard k.
+  """
+  tf_record_output_filenames = [
+      '{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
+      for idx in range(num_shards)
+  ]
+  tfrecords = [
+      exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
+      for file_name in tf_record_output_filenames
+  ]
+  return tfrecords
--- a/research/object_detection/dataset_tools/tf_record_creation_util_test.py
+++ b/research/object_detection/dataset_tools/tf_record_creation_util_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tf_record_creation_util.py."""
+import os
+import contextlib2
+import tensorflow as tf
+from object_detection.dataset_tools import tf_record_creation_util
+class OpenOutputTfrecordsTests(tf.test.TestCase):
+  def test_sharded_tfrecord_writes(self):
+    with contextlib2.ExitStack() as tf_record_close_stack:
+      output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
+          tf_record_close_stack,
+          os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10)
+      for idx in range(10):
+        output_tfrecords[idx].write('test_{}'.format(idx))
+    for idx in range(10):
+      tf_record_path = '{}-{:05d}-of-00010'.format(
+          os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
+      records = list(tf.python_io.tf_record_iterator(tf_record_path))
+      self.assertAllEqual(records, ['test_{}'.format(idx)])
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/evaluator.py
+++ b/research/object_detection/evaluator.py
@@ -39,12 +39,14 @@ EVAL_METRICS_CLASS_DICT = {
        object_detection_evaluation.PascalInstanceSegmentationEvaluator,
    'weighted_pascal_voc_instance_segmentation_metrics':
        object_detection_evaluation.WeightedPascalInstanceSegmentationEvaluator,
-    'open_images_detection_metrics':
+    'open_images_V2_detection_metrics':
        object_detection_evaluation.OpenImagesDetectionEvaluator,
    'coco_detection_metrics':
        coco_evaluation.CocoDetectionEvaluator,
    'coco_mask_metrics':
        coco_evaluation.CocoMaskEvaluator,
+    'oid_challenge_object_detection_metrics':
+        object_detection_evaluation.OpenImagesDetectionChallengeEvaluator,
 }
 EVAL_DEFAULT_METRIC = 'pascal_voc_detection_metrics'

--- a/research/object_detection/g3doc/evaluation_protocols.md
+++ b/research/object_detection/g3doc/evaluation_protocols.md
@@ -4,12 +4,14 @@ The Tensorflow Object Detection API currently supports three evaluation protocol
 that can be configured in `EvalConfig` by setting `metrics_set` to the
 corresponding value.
-## PASCAL VOC 2007 detection metric
+## PASCAL VOC 2010 detection metric
 `EvalConfig.metrics_set='pascal_voc_detection_metrics'`
-The commonly used mAP metric for evaluating the quality of object detectors, computed according to the protocol of the PASCAL VOC Challenge 2007.
+The commonly used mAP metric for evaluating the quality of object detectors,
-The protocol is available [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/devkit_doc_07-Jun-2007.pdf).
+computed according to the protocol of the PASCAL VOC Challenge 2010-2012. The
+protocol is available
+[here](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/devkit_doc_08-May-2010.pdf).
 ## Weighted PASCAL VOC detection metric
@@ -20,33 +22,36 @@ precision when treating all classes as a single class. In comparison,
 PASCAL metrics computes the mean average precision as the mean of the
 per-class average precisions.
-For example, the test set consists of two classes, "cat" and "dog", and there are ten times more boxes of "cat" than those of "dog".
+For example, the test set consists of two classes, "cat" and "dog", and there
-According to PASCAL VOC 2007 metric, performance on each of the two classes would contribute equally towards the final mAP value,
+are ten times more boxes of "cat" than those of "dog". According to PASCAL VOC
-while for the Weighted PASCAL VOC metric the final mAP value will be influenced by frequency of each class.
+2010 metric, performance on each of the two classes would contribute equally
+towards the final mAP value, while for the Weighted PASCAL VOC metric the final
+mAP value will be influenced by frequency of each class.
-## PASCAL VOC 2007 instance segmentation metric
+## PASCAL VOC 2010 instance segmentation metric
 `EvalConfig.metrics_set='pascal_voc_instance_segmentation_metrics'`
-Similar to pascal voc 2007 detection metric, but computes the intersection over
+Similar to Pascal VOC 2010 detection metric, but computes the intersection over
 union based on the object masks instead of object boxes.
 ## Weighted PASCAL VOC instance segmentation metric
 `EvalConfig.metrics_set='weighted_pascal_voc_instance_segmentation_metrics'`
-Similar to the weighted pascal voc 2007 detection metric, but computes the
+Similar to the weighted pascal voc 2010 detection metric, but computes the
 intersection over union based on the object masks instead of object boxes.
-## Open Images detection metric {#open-images}
+## Open Images V2 detection metric
-`EvalConfig.metrics_set='open_images_metrics'`
+`EvalConfig.metrics_set='open_images_V2_detection_metrics'`
-This metric is defined originally for evaluating detector performance on [Open Images V2 dataset](https://github.com/openimages/dataset)
+This metric is defined originally for evaluating detector performance on [Open
-and is fairly similar to the PASCAL VOC 2007 metric mentioned above.
+Images V2 dataset](https://github.com/openimages/dataset) and is fairly similar
-It computes interpolated average precision (AP) for each class and averages it among all classes (mAP).
+to the PASCAL VOC 2010 metric mentioned above. It computes interpolated average
+precision (AP) for each class and averages it among all classes (mAP).
-The difference to the PASCAL VOC 2007 metric is the following: Open Images
+The difference to the PASCAL VOC 2010 metric is the following: Open Images
 annotations contain `group-of` ground-truth boxes (see [Open Images data
 description](https://github.com/openimages/dataset#annotations-human-bboxcsv)),
 that are treated differently for the purpose of deciding whether detections are
@@ -61,7 +66,7 @@ such that:
    box is greater than the IoU threshold (default value 0.5). \
    Illustration of handling non-group-of boxes: \
    ![alt
-    groupof_case_eval](img/nongroupof_case_eval.png "illustration of handling non-group-of boxes: yellow box - ground truth bounding box; green box - true positive; red box - false positives."){width="500" height="270"}
+    groupof_case_eval](img/nongroupof_case_eval.png "illustration of handling non-group-of boxes: yellow box - ground truth bounding box; green box - true positive; red box - false positives.")
    *   yellow box - ground-truth box;
    *   green box - true positive;
@@ -80,7 +85,7 @@ ground-truth box such that:
    ground-truth box. \
    Illustration of handling `group-of` boxes: \
    ![alt
-    groupof_case_eval](img/groupof_case_eval.png "illustration of handling group-of boxes: yellow box - ground truth bounding box; grey boxes - two detections of cars, that are ignored; red box - false positive."){width="500" height="270"}
+    groupof_case_eval](img/groupof_case_eval.png "illustration of handling group-of boxes: yellow box - ground truth bounding box; grey boxes - two detections of cars, that are ignored; red box - false positive.")
    *   yellow box - ground-truth box;
    *   grey boxes - two detections on cars, that are ignored;
@@ -105,3 +110,20 @@ other kind of car is annotated as "car" (for example, a sedan). Given this
 convention, the evaluation software treats all classes independently, ignoring
 the hierarchy. To achieve high performance values, object detectors should
 output bounding-boxes labelled in the same manner.
+## OID Challenge Object Detection Metric 2018
+`EvalConfig.metrics_set='oid_challenge_object_detection_metrics'`
+The metric for the OID Challenge Object Detection Metric 2018, Object Detection
+track. The description is provided on the [Open Images Challenge
+website](https://storage.googleapis.com/openimages/web/challenge.html).
+## OID Challenge Visual Relationship Detection Metric 2018
+The metric for the OID Challenge Visual Relationship Detection Metric 2018, Visual
+Relationship Detection track. The description is provided on the [Open Images
+Challenge
+website](https://storage.googleapis.com/openimages/web/challenge.html). Note:
+this is currently a stand-alone metric, that can be used only through the
+`metrics/oid_vrd_challenge_evaluation.py` util.
--- a/research/object_detection/g3doc/oid_inference_and_evaluation.md
+++ b/research/object_detection/g3doc/oid_inference_and_evaluation.md
@@ -93,7 +93,7 @@ mkdir ${SPLIT}_tfrecords
 PYTHONPATH=$PYTHONPATH:$(readlink -f ..) \
 python -m object_detection/dataset_tools/create_oid_tf_record \
-  --input_annotations_csv 2017_07/$SPLIT/annotations-human-bbox.csv \
+  --input_box_annotations_csv 2017_07/$SPLIT/annotations-human-bbox.csv \
  --input_images_directory raw_images_${SPLIT} \
  --input_label_map ../object_detection/data/oid_bbox_trainable_label_map.pbtxt \
  --output_tf_record_path_prefix ${SPLIT}_tfrecords/$SPLIT.tfrecord \
@@ -214,7 +214,7 @@ tf_record_input_reader: { input_path: '${SPLIT}_detections.tfrecord@${NUM_SHARDS
 " > ${SPLIT}_eval_metrics/${SPLIT}_input_config.pbtxt
 echo "
-metrics_set: 'open_images_metrics'
+metrics_set: 'open_images_V2_detection_metrics'
 " > ${SPLIT}_eval_metrics/${SPLIT}_eval_config.pbtxt
 ```

--- a/research/object_detection/inputs.py
+++ b/research/object_detection/inputs.py
@@ -56,12 +56,15 @@ def transform_input_data(tensor_dict,
  """A single function that is responsible for all input data transformations.
  Data transformation functions are applied in the following order.
-  1. data_augmentation_fn (optional): applied on tensor_dict.
+  1. If key fields.InputDataFields.image_additional_channels is present in
-  2. model_preprocess_fn: applied only on image tensor in tensor_dict.
+     tensor_dict, the additional channels will be merged into
-  3. image_resizer_fn: applied on original image and instance mask tensor in
+     fields.InputDataFields.image.
+  2. data_augmentation_fn (optional): applied on tensor_dict.
+  3. model_preprocess_fn: applied only on image tensor in tensor_dict.
+  4. image_resizer_fn: applied on original image and instance mask tensor in
     tensor_dict.
-  4. one_hot_encoding: applied to classes tensor in tensor_dict.
+  5. one_hot_encoding: applied to classes tensor in tensor_dict.
-  5. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
+  6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
     label.
@@ -88,6 +91,11 @@ def transform_input_data(tensor_dict,
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
    after applying all the transformations.
  """
+  if fields.InputDataFields.image_additional_channels in tensor_dict:
+    channels = tensor_dict[fields.InputDataFields.image_additional_channels]
+    tensor_dict[fields.InputDataFields.image] = tf.concat(
+        [tensor_dict[fields.InputDataFields.image], channels], axis=2)
  if retain_original_image:
    tensor_dict[fields.InputDataFields.original_image] = tf.cast(
        tensor_dict[fields.InputDataFields.image], tf.uint8)

--- a/research/object_detection/inputs_test.py
+++ b/research/object_detection/inputs_test.py
@@ -398,6 +398,33 @@ def _fake_image_resizer_fn(image, mask):
 class DataTransformationFnTest(tf.test.TestCase):
+  def test_combine_additional_channels_if_present(self):
+    image = np.random.rand(4, 4, 3).astype(np.float32)
+    additional_channels = np.random.rand(4, 4, 2).astype(np.float32)
+    tensor_dict = {
+        fields.InputDataFields.image:
+            tf.constant(image),
+        fields.InputDataFields.image_additional_channels:
+            tf.constant(additional_channels),
+        fields.InputDataFields.groundtruth_classes:
+            tf.constant(np.array([1, 1], np.int32))
+    }
+    input_transformation_fn = functools.partial(
+        inputs.transform_input_data,
+        model_preprocess_fn=_fake_model_preprocessor_fn,
+        image_resizer_fn=_fake_image_resizer_fn,
+        num_classes=1)
+    with self.test_session() as sess:
+      transformed_inputs = sess.run(
+          input_transformation_fn(tensor_dict=tensor_dict))
+    self.assertAllEqual(transformed_inputs[fields.InputDataFields.image].dtype,
+                        tf.float32)
+    self.assertAllEqual(transformed_inputs[fields.InputDataFields.image].shape,
+                        [4, 4, 5])
+    self.assertAllClose(transformed_inputs[fields.InputDataFields.image],
+                        np.concatenate((image, additional_channels), axis=2))
  def test_returns_correct_class_label_encodings(self):
    tensor_dict = {
        fields.InputDataFields.image:

--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
@@ -1199,7 +1199,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
    if self._is_training:
      proposal_boxes = tf.stop_gradient(proposal_boxes)
      if not self._hard_example_miner:
-        (groundtruth_boxlists, groundtruth_classes_with_background_list,
+        (groundtruth_boxlists, groundtruth_classes_with_background_list, _,
         _) = self._format_groundtruth_data(true_image_shapes)
        (proposal_boxes, proposal_scores,
         num_proposals) = self._unpad_proposals_and_sample_box_classifier_batch(
@@ -1358,9 +1358,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
        resized_masks_list.append(resized_mask)
      groundtruth_masks_list = resized_masks_list
+    groundtruth_weights_list = None
+    if self.groundtruth_has_field(fields.BoxListFields.weights):
+      groundtruth_weights_list = self.groundtruth_lists(
+          fields.BoxListFields.weights)
    return (groundtruth_boxlists, groundtruth_classes_with_background_list,
-            groundtruth_masks_list)
+            groundtruth_masks_list, groundtruth_weights_list)
  def _sample_box_classifier_minibatch(self,
                                       proposal_boxlist,
@@ -1586,14 +1590,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
    """
    with tf.name_scope(scope, 'Loss', prediction_dict.values()):
      (groundtruth_boxlists, groundtruth_classes_with_background_list,
-       groundtruth_masks_list) = self._format_groundtruth_data(
+       groundtruth_masks_list, groundtruth_weights_list
-           true_image_shapes)
+      ) = self._format_groundtruth_data(true_image_shapes)
      loss_dict = self._loss_rpn(
          prediction_dict['rpn_box_encodings'],
          prediction_dict['rpn_objectness_predictions_with_background'],
-          prediction_dict['anchors'],
+          prediction_dict['anchors'], groundtruth_boxlists,
-          groundtruth_boxlists,
+          groundtruth_classes_with_background_list, groundtruth_weights_list)
-          groundtruth_classes_with_background_list)
      if self._number_of_stages > 1:
        loss_dict.update(
            self._loss_box_classifier(
@@ -1603,18 +1606,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
                prediction_dict['num_proposals'],
                groundtruth_boxlists,
                groundtruth_classes_with_background_list,
+                groundtruth_weights_list,
                prediction_dict['image_shape'],
                prediction_dict.get('mask_predictions'),
                groundtruth_masks_list,
            ))
    return loss_dict
-  def _loss_rpn(self,
+  def _loss_rpn(self, rpn_box_encodings,
-                rpn_box_encodings,
+                rpn_objectness_predictions_with_background, anchors,
-                rpn_objectness_predictions_with_background,
+                groundtruth_boxlists, groundtruth_classes_with_background_list,
-                anchors,
+                groundtruth_weights_list):
-                groundtruth_boxlists,
-                groundtruth_classes_with_background_list):
    """Computes scalar RPN loss tensors.
    Uses self._proposal_target_assigner to obtain regression and classification
@@ -1637,6 +1639,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
      groundtruth_classes_with_background_list: A list of 2-D one-hot
        (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the
        class targets with the 0th index assumed to map to the background class.
+      groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
+        [num_boxes] containing weights for groundtruth boxes.
    Returns:
      a dictionary mapping loss keys (`first_stage_localization_loss`,
@@ -1647,7 +1651,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
      (batch_cls_targets, batch_cls_weights, batch_reg_targets,
       batch_reg_weights, _) = target_assigner.batch_assign_targets(
           self._proposal_target_assigner, box_list.BoxList(anchors),
-           groundtruth_boxlists, len(groundtruth_boxlists)*[None])
+           groundtruth_boxlists,
+           len(groundtruth_boxlists) * [None], groundtruth_weights_list)
      batch_cls_targets = tf.squeeze(batch_cls_targets, axis=2)
      def _minibatch_subsample_fn(inputs):
@@ -1695,6 +1700,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
                           num_proposals,
                           groundtruth_boxlists,
                           groundtruth_classes_with_background_list,
+                           groundtruth_weights_list,
                           image_shape,
                           prediction_masks=None,
                           groundtruth_masks_list=None):
@@ -1731,6 +1737,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
      groundtruth_classes_with_background_list: a list of 2-D one-hot
        (or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the
        class targets with the 0th index assumed to map to the background class.
+      groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
+        [num_boxes] containing weights for groundtruth boxes.
      image_shape: a 1-D tensor of shape [4] representing the image shape.
      prediction_masks: an optional 4-D tensor with shape [total_num_proposals,
        num_classes, mask_height, mask_width] containing the instance masks for
@@ -1765,7 +1773,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
      (batch_cls_targets_with_background, batch_cls_weights, batch_reg_targets,
       batch_reg_weights, _) = target_assigner.batch_assign_targets(
           self._detector_target_assigner, proposal_boxlists,
-           groundtruth_boxlists, groundtruth_classes_with_background_list)
+           groundtruth_boxlists, groundtruth_classes_with_background_list,
+           groundtruth_weights_list)
      class_predictions_with_background = tf.reshape(
          class_predictions_with_background,
@@ -1847,8 +1856,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
            unmatched_cls_target=tf.zeros(image_shape[1:3], dtype=tf.float32))
        (batch_mask_targets, _, _,
         batch_mask_target_weights, _) = target_assigner.batch_assign_targets(
-             mask_target_assigner, proposal_boxlists,
+             mask_target_assigner, proposal_boxlists, groundtruth_boxlists,
-             groundtruth_boxlists, groundtruth_masks_list)
+             groundtruth_masks_list, groundtruth_weights_list)
        # Pad the prediction_masks with to add zeros for background class to be
        # consistent with class predictions.

--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
@@ -26,6 +26,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
 from object_detection.protos import box_predictor_pb2
 from object_detection.protos import hyperparams_pb2
 from object_detection.protos import post_processing_pb2
+from object_detection.utils import test_utils
 slim = tf.contrib.slim
 BOX_CODE_SIZE = 4
@@ -650,8 +651,11 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
    with self.test_session() as sess:
      proposals_out = sess.run(proposals)
-      self.assertAllClose(proposals_out['detection_boxes'],
+      for image_idx in range(batch_size):
-                          expected_proposal_boxes)
+        self.assertTrue(
+            test_utils.first_rows_close_as_set(
+                proposals_out['detection_boxes'][image_idx].tolist(),
+                expected_proposal_boxes[image_idx]))
      self.assertAllClose(proposals_out['detection_scores'],
                          expected_proposal_scores)
      self.assertAllEqual(proposals_out['num_detections'],
@@ -810,7 +814,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
  def test_loss_full(self):
    model = self._build_model(
        is_training=True, number_of_stages=2, second_stage_batch_size=6)
-    batch_size = 2
+    batch_size = 3
    anchors = tf.constant(
        [[0, 0, 16, 16],
         [0, 16, 16, 32],
@@ -822,31 +826,25 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
         BOX_CODE_SIZE], dtype=tf.float32)
    # use different numbers for the objectness category to break ties in
    # order of boxes returned by NMS
-    rpn_objectness_predictions_with_background = tf.constant([
+    rpn_objectness_predictions_with_background = tf.constant(
-        [[-10, 13],
+        [[[-10, 13], [10, -10], [10, -11], [-10, 12]], [[10, -10], [-10, 13], [
-         [10, -10],
+            -10, 12
-         [10, -11],
+        ], [10, -11]], [[10, -10], [-10, 13], [-10, 12], [10, -11]]],
-         [-10, 12]],
+        dtype=tf.float32)
-        [[10, -10],
-         [-10, 13],
-         [-10, 12],
-         [10, -11]]], dtype=tf.float32)
    image_shape = tf.constant([batch_size, 32, 32, 3], dtype=tf.int32)
-    num_proposals = tf.constant([6, 6], dtype=tf.int32)
+    num_proposals = tf.constant([6, 6, 6], dtype=tf.int32)
    proposal_boxes = tf.constant(
-        2 * [[[0, 0, 16, 16],
+        3 * [[[0, 0, 16, 16], [0, 16, 16, 32], [16, 0, 32, 16],
-              [0, 16, 16, 32],
+              [16, 16, 32, 32], [0, 0, 16, 16], [0, 16, 16, 32]]],
-              [16, 0, 32, 16],
+        dtype=tf.float32)
-              [16, 16, 32, 32],
-              [0, 0, 16, 16],
-              [0, 16, 16, 32]]], dtype=tf.float32)
    refined_box_encodings = tf.zeros(
        (batch_size * model.max_num_proposals,
         model.num_classes,
         BOX_CODE_SIZE), dtype=tf.float32)
    class_predictions_with_background = tf.constant(
-        [[-10, 10, -10],  # first image
+        [
+            [-10, 10, -10],  # first image
            [10, -10, -10],
            [10, -10, -10],
            [-10, -10, 10],
@@ -857,7 +855,15 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
            [-10, 10, -10],
            [10, -10, -10],
            [10, -10, -10],
-         [-10, 10, -10]], dtype=tf.float32)
+            [-10, 10, -10],
+            [10, -10, -10],  # third image
+            [-10, 10, -10],
+            [-10, 10, -10],
+            [10, -10, -10],
+            [10, -10, -10],
+            [-10, 10, -10]
+        ],
+        dtype=tf.float32)
    mask_predictions_logits = 20 * tf.ones((batch_size *
                                            model.max_num_proposals,
@@ -867,18 +873,29 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
    groundtruth_boxes_list = [
        tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
-        tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)]
+        tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32),
-    groundtruth_classes_list = [tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
+        tf.constant([[0, .5, .5, 1], [.5, 0, 1, 1]], dtype=tf.float32)
-                                tf.constant([[1, 0], [1, 0]], dtype=tf.float32)]
+    ]
+    groundtruth_classes_list = [
+        tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
+        tf.constant([[1, 0], [1, 0]], dtype=tf.float32),
+        tf.constant([[1, 0], [0, 1]], dtype=tf.float32)
+    ]
    # Set all elements of groundtruth mask to 1.0. In this case all proposal
    # crops of the groundtruth masks should return a mask that covers the entire
    # proposal. Thus, if mask_predictions_logits element values are all greater
    # than 20, the loss should be zero.
-    groundtruth_masks_list = [tf.convert_to_tensor(np.ones((2, 32, 32)),
+    groundtruth_masks_list = [
-                                                   dtype=tf.float32),
+        tf.convert_to_tensor(np.ones((2, 32, 32)), dtype=tf.float32),
-                              tf.convert_to_tensor(np.ones((2, 32, 32)),
+        tf.convert_to_tensor(np.ones((2, 32, 32)), dtype=tf.float32),
-                                                   dtype=tf.float32)]
+        tf.convert_to_tensor(np.ones((2, 32, 32)), dtype=tf.float32)
+    ]
+    groundtruth_weights_list = [
+        tf.constant([1, 1], dtype=tf.float32),
+        tf.constant([1, 1], dtype=tf.float32),
+        tf.constant([1, 0], dtype=tf.float32)
+    ]
    prediction_dict = {
        'rpn_box_encodings': rpn_box_encodings,
        'rpn_objectness_predictions_with_background':
@@ -892,9 +909,11 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
        'mask_predictions': mask_predictions_logits
    }
    _, true_image_shapes = model.preprocess(tf.zeros(image_shape))
-    model.provide_groundtruth(groundtruth_boxes_list,
+    model.provide_groundtruth(
+        groundtruth_boxes_list,
        groundtruth_classes_list,
-                              groundtruth_masks_list)
+        groundtruth_masks_list,
+        groundtruth_weights_list=groundtruth_weights_list)
    loss_dict = model.loss(prediction_dict, true_image_shapes)
    with self.test_session() as sess:

--- a/research/object_detection/meta_architectures/ssd_meta_arch.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch.py
@@ -402,8 +402,9 @@ class SSDMetaArch(model.DetectionModel):
              im_width=image_shape[2]))
      prediction_dict = self._box_predictor.predict(
          feature_maps, self._anchor_generator.num_anchors_per_location())
-      box_encodings = tf.squeeze(
+      box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
-          tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)
+      if box_encodings.shape.ndims == 4 and box_encodings.shape[2] == 1:
+        box_encodings = tf.squeeze(box_encodings, axis=2)
      class_predictions_with_background = tf.concat(
          prediction_dict['class_predictions_with_background'], axis=1)
      predictions_dict = {

--- a/research/object_detection/meta_architectures/ssd_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch_test.py
@@ -243,21 +243,24 @@ class SsdMetaArchTest(test_case.TestCase):
                    (batch_size, None, None, 3),
                    (None, None, None, 3)]
-    expected_boxes = np.array([[[0, 0, .5, .5],
+    expected_boxes = [
+        [
+            [0, 0, .5, .5],
            [0, .5, .5, 1],
            [.5, 0, 1, .5],
            [0, 0, 0, 0],  # pruned prediction
-                                [0, 0, 0, 0]],  # padding
+            [0, 0, 0, 0]
-                               [[0, 0, .5, .5],
+        ],  # padding
+        [
+            [0, 0, .5, .5],
            [0, .5, .5, 1],
            [.5, 0, 1, .5],
            [0, 0, 0, 0],  # pruned prediction
-                                [0, 0, 0, 0]]  # padding
+            [0, 0, 0, 0]
-                              ])
+        ]
-    expected_scores = np.array([[0, 0, 0, 0, 0],
+    ]  # padding
-                                [0, 0, 0, 0, 0]])
+    expected_scores = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
-    expected_classes = np.array([[0, 0, 0, 0, 0],
+    expected_classes = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
-                                 [0, 0, 0, 0, 0]])
    expected_num_detections = np.array([3, 3])
    for input_shape in input_shapes:
@@ -282,7 +285,11 @@ class SsdMetaArchTest(test_case.TestCase):
                                      input_placeholder:
                                      np.random.uniform(
                                          size=(batch_size, 2, 2, 3))})
-      self.assertAllClose(detections_out['detection_boxes'], expected_boxes)
+      for image_idx in range(batch_size):
+        self.assertTrue(
+            test_utils.first_rows_close_as_set(
+                detections_out['detection_boxes'][image_idx].tolist(),
+                expected_boxes[image_idx]))
      self.assertAllClose(detections_out['detection_scores'], expected_scores)
      self.assertAllClose(detections_out['detection_classes'], expected_classes)
      self.assertAllClose(detections_out['num_detections'],
@@ -429,7 +436,7 @@ class SsdMetaArchTest(test_case.TestCase):
  def test_restore_map_for_detection_ckpt(self):
    model, _, _, _ = self._create_model()
-    model.predict(tf.constant(np.array([[[0, 0], [1, 1]], [[1, 0], [0, 1]]],
+    model.predict(tf.constant(np.array([[[[0, 0], [1, 1]], [[1, 0], [0, 1]]]],
                                       dtype=np.float32)),
                  true_image_shapes=None)
    init_op = tf.global_variables_initializer()

--- a/research/object_detection/metrics/oid_vrd_challenge_evaluation.py
+++ b/research/object_detection/metrics/oid_vrd_challenge_evaluation.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Runs evaluation using OpenImages groundtruth and predictions.
+Example usage:
+  python third_party/tensorflow_models/object_detection/\
+  metrics/oid_vrd_challenge_evaluation.py \
+    --input_annotations_boxes=/path/to/input/annotations-human-bbox.csv \
+    --input_annotations_labels=/path/to/input/annotations-label.csv \
+    --input_class_labelmap=/path/to/input/class_labelmap.pbtxt \
+    --input_relationship_labelmap=/path/to/input/relationship_labelmap.pbtxt \
+    --input_predictions=/path/to/input/predictions.csv \
+    --output_metrics=/path/to/output/metric.csv \
+CSVs with bounding box annotations and image label (including the image URLs)
+can be downloaded from the Open Images Challenge website:
+https://storage.googleapis.com/openimages/web/challenge.html
+The format of the input csv and the metrics itself are described on the
+challenge website.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import argparse
+import pandas as pd
+from google.protobuf import text_format
+from object_detection.metrics import oid_vrd_challenge_evaluation_utils as utils
+from object_detection.protos import string_int_label_map_pb2
+from object_detection.utils import vrd_evaluation
+def _load_labelmap(labelmap_path):
+  """Loads labelmap from the labelmap path.
+  Args:
+    labelmap_path: Path to the labelmap.
+  Returns:
+    A dictionary mapping class name to class numerical id.
+  """
+  label_map = string_int_label_map_pb2.StringIntLabelMap()
+  with open(labelmap_path, 'r') as fid:
+    label_map_string = fid.read()
+    text_format.Merge(label_map_string, label_map)
+  labelmap_dict = {}
+  for item in label_map.item:
+    labelmap_dict[item.name] = item.id
+  return labelmap_dict
+def _swap_labelmap_dict(labelmap_dict):
+  """Swaps keys and labels in labelmap.
+  Args:
+    labelmap_dict: Input dictionary.
+  Returns:
+    A dictionary mapping class name to class numerical id.
+  """
+  return dict((v, k) for k, v in labelmap_dict.iteritems())
+def main(parsed_args):
+  all_box_annotations = pd.read_csv(parsed_args.input_annotations_boxes)
+  all_label_annotations = pd.read_csv(parsed_args.input_annotations_labels)
+  all_annotations = pd.concat([all_box_annotations, all_label_annotations])
+  class_label_map = _load_labelmap(parsed_args.input_class_labelmap)
+  relationship_label_map = _load_labelmap(
+      parsed_args.input_relationship_labelmap)
+  relation_evaluator = vrd_evaluation.VRDRelationDetectionEvaluator()
+  phrase_evaluator = vrd_evaluation.VRDPhraseDetectionEvaluator()
+  for _, groundtruth in enumerate(all_annotations.groupby('ImageID')):
+    image_id, image_groundtruth = groundtruth
+    groundtruth_dictionary = utils.build_groundtruth_vrd_dictionary(
+        image_groundtruth, class_label_map, relationship_label_map)
+    relation_evaluator.add_single_ground_truth_image_info(
+        image_id, groundtruth_dictionary)
+    phrase_evaluator.add_single_ground_truth_image_info(image_id,
+                                                        groundtruth_dictionary)
+  all_predictions = pd.read_csv(parsed_args.input_predictions)
+  for _, prediction_data in enumerate(all_predictions.groupby('ImageID')):
+    image_id, image_predictions = prediction_data
+    prediction_dictionary = utils.build_predictions_vrd_dictionary(
+        image_predictions, class_label_map, relationship_label_map)
+    relation_evaluator.add_single_detected_image_info(image_id,
+                                                      prediction_dictionary)
+    phrase_evaluator.add_single_detected_image_info(image_id,
+                                                    prediction_dictionary)
+  relation_metrics = relation_evaluator.evaluate()
+  phrase_metrics = phrase_evaluator.evaluate()
+  with open(parsed_args.output_metrics, 'w') as fid:
+    utils.write_csv(fid, relation_metrics)
+    utils.write_csv(fid, phrase_metrics)
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser(
+      description=
+      'Evaluate Open Images Visual Relationship Detection predictions.')
+  parser.add_argument(
+      '--input_annotations_boxes',
+      required=True,
+      help='File with groundtruth vrd annotations.')
+  parser.add_argument(
+      '--input_annotations_labels',
+      required=True,
+      help='File with groundtruth labels annotations')
+  parser.add_argument(
+      '--input_predictions',
+      required=True,
+      help="""File with detection predictions; NOTE: no postprocessing is
+      applied in the evaluation script.""")
+  parser.add_argument(
+      '--input_class_labelmap',
+      required=True,
+      help="""OpenImages Challenge labelmap; note: it is expected to include
+      attributes.""")
+  parser.add_argument(
+      '--input_relationship_labelmap',
+      required=True,
+      help="""OpenImages Challenge relationship labelmap.""")
+  parser.add_argument(
+      '--output_metrics', required=True, help='Output file with csv metrics')
+  args = parser.parse_args()
+  main(args)
--- a/research/object_detection/metrics/oid_vrd_challenge_evaluation_utils.py
+++ b/research/object_detection/metrics/oid_vrd_challenge_evaluation_utils.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Converts data from CSV format to the VRDDetectionEvaluator format."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import csv
+import numpy as np
+from object_detection.core import standard_fields
+from object_detection.utils import vrd_evaluation
+def build_groundtruth_vrd_dictionary(data, class_label_map,
+                                     relationship_label_map):
+  """Builds a groundtruth dictionary from groundtruth data in CSV file.
+  Args:
+    data: Pandas DataFrame with the groundtruth data for a single image.
+    class_label_map: Class labelmap from string label name to an integer.
+    relationship_label_map: Relationship type labelmap from string name to an
+      integer.
+  Returns:
+    A dictionary with keys suitable for passing to
+    VRDDetectionEvaluator.add_single_ground_truth_image_info:
+        standard_fields.InputDataFields.groundtruth_boxes: A numpy array
+          of structures with the shape [M, 1], representing M tuples, each tuple
+          containing the same number of named bounding boxes.
+          Each box is of the format [y_min, x_min, y_max, x_max] (see
+          datatype vrd_box_data_type, single_box_data_type above).
+        standard_fields.InputDataFields.groundtruth_classes: A numpy array of
+          structures shape [M, 1], representing  the class labels of the
+          corresponding bounding boxes and possibly additional classes (see
+          datatype label_data_type above).
+        standard_fields.InputDataFields.verified_labels: numpy array
+          of shape [K] containing verified labels.
+  """
+  data_boxes = data[data.LabelName.isnull()]
+  data_labels = data[data.LabelName1.isnull()]
+  boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
+  boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
+                                 'XMax1']].as_matrix()
+  boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix()
+  labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
+  labels['subject'] = data_boxes['LabelName1'].map(lambda x: class_label_map[x])
+  labels['object'] = data_boxes['LabelName2'].map(lambda x: class_label_map[x])
+  labels['relation'] = data_boxes['RelationshipLabel'].map(
+      lambda x: relationship_label_map[x])
+  return {
+      standard_fields.InputDataFields.groundtruth_boxes:
+          boxes,
+      standard_fields.InputDataFields.groundtruth_classes:
+          labels,
+      standard_fields.InputDataFields.verified_labels:
+          data_labels['LabelName'].map(lambda x: class_label_map[x]),
+  }
+def build_predictions_vrd_dictionary(data, class_label_map,
+                                     relationship_label_map):
+  """Builds a predictions dictionary from predictions data in CSV file.
+  Args:
+    data: Pandas DataFrame with the predictions data for a single image.
+    class_label_map: Class labelmap from string label name to an integer.
+    relationship_label_map: Relationship type labelmap from string name to an
+      integer.
+  Returns:
+    Dictionary with keys suitable for passing to
+    VRDDetectionEvaluator.add_single_detected_image_info:
+        standard_fields.DetectionResultFields.detection_boxes: A numpy array of
+          structures with shape [N, 1], representing N tuples, each tuple
+          containing the same number of named bounding boxes.
+          Each box is of the format [y_min, x_min, y_max, x_max] (as an example
+          see datatype vrd_box_data_type, single_box_data_type above).
+        standard_fields.DetectionResultFields.detection_scores: float32 numpy
+          array of shape [N] containing detection scores for the boxes.
+        standard_fields.DetectionResultFields.detection_classes: A numpy array
+          of structures shape [N, 1], representing the class labels of the
+          corresponding bounding boxes and possibly additional classes (see
+          datatype label_data_type above).
+  """
+  data_boxes = data
+  boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
+  boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
+                                 'XMax1']].as_matrix()
+  boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix()
+  labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
+  labels['subject'] = data_boxes['LabelName1'].map(lambda x: class_label_map[x])
+  labels['object'] = data_boxes['LabelName2'].map(lambda x: class_label_map[x])
+  labels['relation'] = data_boxes['RelationshipLabel'].map(
+      lambda x: relationship_label_map[x])
+  return {
+      standard_fields.DetectionResultFields.detection_boxes:
+          boxes,
+      standard_fields.DetectionResultFields.detection_classes:
+          labels,
+      standard_fields.DetectionResultFields.detection_scores:
+          data_boxes['Score'].as_matrix()
+  }
+def write_csv(fid, metrics):
+  """Writes metrics key-value pairs to CSV file.
+  Args:
+    fid: File identifier of an opened file.
+    metrics: A dictionary with metrics to be written.
+  """
+  metrics_writer = csv.writer(fid, delimiter=',')
+  for metric_name, metric_value in metrics.items():
+    metrics_writer.writerow([metric_name, str(metric_value)])
--- a/research/object_detection/metrics/oid_vrd_challenge_evaluation_utils_test.py
+++ b/research/object_detection/metrics/oid_vrd_challenge_evaluation_utils_test.py
--- a/research/object_detection/metrics/tf_example_parser.py
+++ b/research/object_detection/metrics/tf_example_parser.py
@@ -113,7 +113,9 @@ class TfExampleDetectionAndGTParser(data_parser.DataToNumpyParser):
        fields.InputDataFields.groundtruth_difficult:
            Int64Parser(fields.TfExampleFields.object_difficult),
        fields.InputDataFields.groundtruth_group_of:
-            Int64Parser(fields.TfExampleFields.object_group_of)
+            Int64Parser(fields.TfExampleFields.object_group_of),
+        fields.InputDataFields.verified_labels:
+            Int64Parser(fields.TfExampleFields.image_class_label),
    }
  def parse(self, tf_example):

--- a/research/object_detection/metrics/tf_example_parser_test.py
+++ b/research/object_detection/metrics/tf_example_parser_test.py
@@ -44,6 +44,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
    object_class_label = [1, 1, 2]
    object_difficult = [1, 0, 0]
    object_group_of = [0, 0, 1]
+    verified_labels = [1, 2, 3, 4]
    detection_class_label = [2, 1]
    detection_score = [0.5, 0.3]
    features = {
@@ -113,10 +114,19 @@ class TfExampleDecoderTest(tf.test.TestCase):
    example = tf.train.Example(features=tf.train.Features(feature=features))
    results_dict = parser.parse(example)
    self.assertIsNotNone(results_dict)
-    np_testing.assert_almost_equal(
+    np_testing.assert_equal(
        object_group_of,
        results_dict[fields.InputDataFields.groundtruth_group_of])
+    features[fields.TfExampleFields.image_class_label] = (
+        self._Int64Feature(verified_labels))
+    example = tf.train.Example(features=tf.train.Features(feature=features))
+    results_dict = parser.parse(example)
+    self.assertIsNotNone(results_dict)
+    np_testing.assert_equal(
+        verified_labels, results_dict[fields.InputDataFields.verified_labels])
  def testParseString(self):
    string_val = 'abc'
    features = {'string': self._BytesFeature(string_val)}

--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
@@ -147,8 +147,6 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
-        # TODO(skligys): Enable fused batch norm once quantization supports it.
-          with slim.arg_scope([slim.batch_norm], fused=False):
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
@@ -157,8 +155,6 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      with slim.arg_scope(self._conv_hyperparams_fn()):
-        # TODO(skligys): Enable fused batch norm once quantization supports it.
-        with slim.arg_scope([slim.batch_norm], fused=False):
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=feature_map_layout,
            depth_multiplier=self._depth_multiplier,

--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
@@ -110,8 +110,6 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
-        # TODO(skligys): Enable fused batch norm once quantization supports it.
-          with slim.arg_scope([slim.batch_norm], fused=False):
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
@@ -120,8 +118,6 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      with slim.arg_scope(self._conv_hyperparams_fn()):
-        # TODO(skligys): Enable fused batch norm once quantization supports it.
-        with slim.arg_scope([slim.batch_norm], fused=False):
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=feature_map_layout,
            depth_multiplier=self._depth_multiplier,

--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
@@ -148,7 +148,7 @@ class SsdMobilenetV1FeatureExtractorTest(
    self.check_feature_extractor_variables_under_scope(
        depth_multiplier, pad_to_multiple, scope_name)
-  def test_nofused_batchnorm(self):
+  def test_has_fused_batchnorm(self):
    image_height = 40
    image_width = 40
    depth_multiplier = 1
@@ -159,7 +159,7 @@ class SsdMobilenetV1FeatureExtractorTest(
                                                       pad_to_multiple)
    preprocessed_image = feature_extractor.preprocess(image_placeholder)
    _ = feature_extractor.extract_features(preprocessed_image)
-    self.assertFalse(any(op.type == 'FusedBatchNorm'
+    self.assertTrue(any(op.type == 'FusedBatchNorm'
                        for op in tf.get_default_graph().get_operations()))
 if __name__ == '__main__':