Merge pull request #3293 from pkulzc/master

Internal changes of object_detection

Merge pull request #3293 from pkulzc/master
Internal changes of object_detection
fd7b6887 · Jonathan Huang · GitHub · f98ec55e · 1efe98bb · fd7b6887
Unverified Commit fd7b6887 authored Feb 09, 2018 by Jonathan Huang Committed by GitHub Feb 09, 2018
20 changed files
--- a/research/object_detection/dataset_tools/create_pascal_tf_record_test.py
+++ b/research/object_detection/dataset_tools/create_pascal_tf_record_test.py
@@ -24,7 +24,7 @@ import tensorflow as tf
 from object_detection.dataset_tools import create_pascal_tf_record


-class DictToTFExampleTest(tf.test.TestCase):
+class CreatePascalTFRecordTest(tf.test.TestCase):

  def _assertProtoEqual(self, proto_field, expectation):
    """Helper function to assert if a proto field equals some value.

--- a/research/object_detection/dataset_tools/create_pet_tf_record.py
+++ b/research/object_detection/dataset_tools/create_pet_tf_record.py
@@ -50,6 +50,8 @@ flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes '
                     'for pet faces.  Otherwise generates bounding boxes (as '
                     'well as segmentations for full pet bodies).  Note that '
                     'in the latter case, the resulting files are much larger.')
+flags.DEFINE_string('mask_type', 'png', 'How to represent instance '
+                    'segmentation masks. Options are "png" or "numerical".')
 FLAGS = flags.FLAGS


@@ -72,7 +74,8 @@ def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
-                       faces_only=True):
+                       faces_only=True,
+                       mask_type='png'):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
@@ -89,6 +92,8 @@ def dict_to_tf_example(data,
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
+    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
+      smaller file sizes.

  Returns:
    example: The converted tf.Example.
@@ -158,7 +163,7 @@ def dict_to_tf_example(data,
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))
    if not faces_only:
-      mask_remapped = mask_np != 2
+      mask_remapped = (mask_np != 2).astype(np.uint8)
      masks.append(mask_remapped)

  feature_dict = {
@@ -182,10 +187,20 @@ def dict_to_tf_example(data,
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }
  if not faces_only:
+    if mask_type == 'numerical':
      mask_stack = np.stack(masks).astype(np.float32)
      masks_flattened = np.reshape(mask_stack, [-1])
      feature_dict['image/object/mask'] = (
          dataset_util.float_list_feature(masks_flattened.tolist()))
+    elif mask_type == 'png':
+      encoded_mask_png_list = []
+      for mask in masks:
+        img = PIL.Image.fromarray(mask)
+        output = io.BytesIO()
+        img.save(output, format='PNG')
+        encoded_mask_png_list.append(output.getvalue())
+      feature_dict['image/object/mask'] = (
+          dataset_util.bytes_list_feature(encoded_mask_png_list))

  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return example
@@ -196,7 +211,8 @@ def create_tf_record(output_filename,
                     annotations_dir,
                     image_dir,
                     examples,
-                     faces_only=True):
+                     faces_only=True,
+                     mask_type='png'):
  """Creates a TFRecord file from examples.

  Args:
@@ -207,6 +223,8 @@ def create_tf_record(output_filename,
    examples: Examples to parse and save to tf record.
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
+    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
+      smaller file sizes.
  """
  writer = tf.python_io.TFRecordWriter(output_filename)
  for idx, example in enumerate(examples):
@@ -225,7 +243,12 @@ def create_tf_record(output_filename,

    try:
      tf_example = dict_to_tf_example(
-          data, mask_path, label_map_dict, image_dir, faces_only=faces_only)
+          data,
+          mask_path,
+          label_map_dict,
+          image_dir,
+          faces_only=faces_only,
+          mask_type=mask_type)
      writer.write(tf_example.SerializeToString())
    except ValueError:
      logging.warning('Invalid example: %s, ignoring.', xml_path)
@@ -233,7 +256,7 @@ def create_tf_record(output_filename,
  writer.close()


-# TODO(derekjchow): Add test for pet/PASCAL main files.
+# TODO: Add test for pet/PASCAL main files.
 def main(_):
  data_dir = FLAGS.data_dir
  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
@@ -262,10 +285,22 @@ def main(_):
                                     'pet_train_with_masks.record')
    val_output_path = os.path.join(FLAGS.output_dir,
                                   'pet_val_with_masks.record')
-  create_tf_record(train_output_path, label_map_dict, annotations_dir,
-                   image_dir, train_examples, faces_only=FLAGS.faces_only)
-  create_tf_record(val_output_path, label_map_dict, annotations_dir,
-                   image_dir, val_examples, faces_only=FLAGS.faces_only)
+  create_tf_record(
+      train_output_path,
+      label_map_dict,
+      annotations_dir,
+      image_dir,
+      train_examples,
+      faces_only=FLAGS.faces_only,
+      mask_type=FLAGS.mask_type)
+  create_tf_record(
+      val_output_path,
+      label_map_dict,
+      annotations_dir,
+      image_dir,
+      val_examples,
+      faces_only=FLAGS.faces_only,
+      mask_type=FLAGS.mask_type)


 if __name__ == '__main__':

--- a/research/object_detection/dataset_tools/download_and_preprocess_mscoco.sh
+++ b/research/object_detection/dataset_tools/download_and_preprocess_mscoco.sh
+#!/bin/bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# Script to download and preprocess the MSCOCO data set for detection.
+#
+# The outputs of this script are TFRecord files containing serialized
+# tf.Example protocol buffers. See create_coco_tf_record.py for details of how
+# the tf.Example protocol buffers are constructed and see
+# http://cocodataset.org/#overview for an overview of the dataset.
+#
+# usage:
+#  bash object_detection/dataset_tools/download_and_preprocess_mscoco.sh \
+#    /tmp/mscoco
+set -e
+
+if [ -z "$1" ]; then
+  echo "usage download_and_preprocess_mscoco.sh [data dir]"
+  exit
+fi
+
+if [ "$(uname)" == "Darwin" ]; then
+  UNZIP="tar -xf"
+else
+  UNZIP="unzip -nq"
+fi
+
+# Create the output directories.
+OUTPUT_DIR="${1%/}"
+SCRATCH_DIR="${OUTPUT_DIR}/raw-data"
+mkdir -p "${OUTPUT_DIR}"
+mkdir -p "${SCRATCH_DIR}"
+CURRENT_DIR=$(pwd)
+
+# Helper function to download and unpack a .zip file.
+function download_and_unzip() {
+  local BASE_URL=${1}
+  local FILENAME=${2}
+
+  if [ ! -f ${FILENAME} ]; then
+    echo "Downloading ${FILENAME} to $(pwd)"
+    wget -nd -c "${BASE_URL}/${FILENAME}"
+  else
+    echo "Skipping download of ${FILENAME}"
+  fi
+  echo "Unzipping ${FILENAME}"
+  ${UNZIP} ${FILENAME}
+}
+
+cd ${SCRATCH_DIR}
+
+# Download the images.
+BASE_IMAGE_URL="http://images.cocodataset.org/zips"
+
+# TRAIN_IMAGE_FILE="train2017.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
+TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017"
+
+VAL_IMAGE_FILE="val2017.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE}
+VAL_IMAGE_DIR="${SCRATCH_DIR}/val2017"
+
+TEST_IMAGE_FILE="test2017.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${TEST_IMAGE_FILE}
+TEST_IMAGE_DIR="${SCRATCH_DIR}/test2017"
+
+# Download the annotations.
+BASE_INSTANCES_URL="http://images.cocodataset.org/annotations"
+INSTANCES_FILE="annotations_trainval2017.zip"
+download_and_unzip ${BASE_INSTANCES_URL} ${INSTANCES_FILE}
+
+TRAIN_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_train2017.json"
+VAL_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_val2017.json"
+
+# Download the test image info.
+BASE_IMAGE_INFO_URL="http://images.cocodataset.org/annotations"
+IMAGE_INFO_FILE="image_info_test2017.zip"
+download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE}
+
+TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json"
+
+# # Build TFRecords of the image data.
+cd "${CURRENT_DIR}"
+python object_detection/dataset_tools/create_coco_tf_record.py \
+  --logtostderr \
+  --include_masks \
+  --train_image_dir="${TRAIN_IMAGE_DIR}" \
+  --val_image_dir="${VAL_IMAGE_DIR}" \
+  --test_image_dir="${TEST_IMAGE_DIR}" \
+  --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
+  --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
+  --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
+  --output_dir="${OUTPUT_DIR}"
+
--- a/research/object_detection/dataset_tools/oid_tfrecord_creation.py
+++ b/research/object_detection/dataset_tools/oid_tfrecord_creation.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from six.moves import xrange
 import tensorflow as tf

 from object_detection.core import standard_fields
@@ -103,7 +102,7 @@ def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
  """
  tf_record_output_filenames = [
      '{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
-      for idx in xrange(num_shards)
+      for idx in range(num_shards)
  ]

  tfrecords = [

--- a/research/object_detection/eval.py
+++ b/research/object_detection/eval.py
@@ -48,9 +48,10 @@ import os
 import tensorflow as tf

 from object_detection import evaluator
-from object_detection.builders import input_reader_builder
+from object_detection.builders import dataset_builder
 from object_detection.builders import model_builder
 from object_detection.utils import config_util
+from object_detection.utils import dataset_util
 from object_detection.utils import label_map_util


@@ -103,19 +104,20 @@ def main(unused_argv):

  model_config = configs['model']
  eval_config = configs['eval_config']
+  input_config = configs['eval_input_config']
  if FLAGS.eval_training_data:
    input_config = configs['train_input_config']
-  else:
-    input_config = configs['eval_input_config']

  model_fn = functools.partial(
      model_builder.build,
      model_config=model_config,
      is_training=False)

-  create_input_dict_fn = functools.partial(
-      input_reader_builder.build,
-      input_config)
+  def get_next(config):
+    return dataset_util.make_initializable_iterator(
+        dataset_builder.build(config)).get_next()
+
+  create_input_dict_fn = functools.partial(get_next, input_config)

  label_map = label_map_util.load_labelmap(input_config.label_map_path)
  max_num_classes = max([item.id for item in label_map.item])

--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -40,14 +40,13 @@ def write_metrics(metrics, global_step, summary_dir):
    summary_dir: Directory to write tensorflow summaries to.
  """
  logging.info('Writing metrics to tf summary.')
-  summary_writer = tf.summary.FileWriter(summary_dir)
+  summary_writer = tf.summary.FileWriterCache.get(summary_dir)
  for key in sorted(metrics):
    summary = tf.Summary(value=[
        tf.Summary.Value(tag=key, simple_value=metrics[key]),
    ])
    summary_writer.add_summary(summary, global_step)
    logging.info('%s: %f', key, metrics[key])
-  summary_writer.close()
  logging.info('Metrics written to tf summary.')


@@ -60,8 +59,12 @@ def visualize_detection_results(result_dict,
                                export_dir='',
                                agnostic_mode=False,
                                show_groundtruth=False,
+                                groundtruth_box_visualization_color='black',
                                min_score_thresh=.5,
-                                max_num_predictions=20):
+                                max_num_predictions=20,
+                                skip_scores=False,
+                                skip_labels=False,
+                                keep_image_id_for_visualization_export=False):
  """Visualizes detection results and writes visualizations to image summaries.

  This function visualizes an image with its detected bounding boxes and writes
@@ -99,44 +102,57 @@ def visualize_detection_results(result_dict,
      class-agnostic mode or not.
    show_groundtruth: boolean (default: False) controlling whether to show
      groundtruth boxes in addition to detected boxes
+    groundtruth_box_visualization_color: box color for visualizing groundtruth
+      boxes
    min_score_thresh: minimum score threshold for a box to be visualized
    max_num_predictions: maximum number of detections to visualize
+    skip_scores: whether to skip score when drawing a single detection
+    skip_labels: whether to skip label when drawing a single detection
+    keep_image_id_for_visualization_export: whether to keep image identifier in
+      filename when exported to export_dir
  Raises:
    ValueError: if result_dict does not contain the expected keys (i.e.,
      'original_image', 'detection_boxes', 'detection_scores',
      'detection_classes')
  """
+  detection_fields = fields.DetectionResultFields
+  input_fields = fields.InputDataFields
  if not set([
-      'original_image', 'detection_boxes', 'detection_scores',
-      'detection_classes'
+      input_fields.original_image,
+      detection_fields.detection_boxes,
+      detection_fields.detection_scores,
+      detection_fields.detection_classes,
  ]).issubset(set(result_dict.keys())):
    raise ValueError('result_dict does not contain all expected keys.')
-  if show_groundtruth and 'groundtruth_boxes' not in result_dict:
+  if show_groundtruth and input_fields.groundtruth_boxes not in result_dict:
    raise ValueError('If show_groundtruth is enabled, result_dict must contain '
                     'groundtruth_boxes.')
  logging.info('Creating detection visualizations.')
  category_index = label_map_util.create_category_index(categories)

-  image = np.squeeze(result_dict['original_image'], axis=0)
-  detection_boxes = result_dict['detection_boxes']
-  detection_scores = result_dict['detection_scores']
-  detection_classes = np.int32((result_dict['detection_classes']))
-  detection_keypoints = result_dict.get('detection_keypoints', None)
-  detection_masks = result_dict.get('detection_masks', None)
+  image = np.squeeze(result_dict[input_fields.original_image], axis=0)
+  detection_boxes = result_dict[detection_fields.detection_boxes]
+  detection_scores = result_dict[detection_fields.detection_scores]
+  detection_classes = np.int32((result_dict[
+      detection_fields.detection_classes]))
+  detection_keypoints = result_dict.get(detection_fields.detection_keypoints)
+  detection_masks = result_dict.get(detection_fields.detection_masks)
+  detection_boundaries = result_dict.get(detection_fields.detection_boundaries)

  # Plot groundtruth underneath detections
  if show_groundtruth:
-    groundtruth_boxes = result_dict['groundtruth_boxes']
-    groundtruth_keypoints = result_dict.get('groundtruth_keypoints', None)
+    groundtruth_boxes = result_dict[input_fields.groundtruth_boxes]
+    groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints)
    vis_utils.visualize_boxes_and_labels_on_image_array(
-        image,
-        groundtruth_boxes,
-        None,
-        None,
-        category_index,
+        image=image,
+        boxes=groundtruth_boxes,
+        classes=None,
+        scores=None,
+        category_index=category_index,
        keypoints=groundtruth_keypoints,
        use_normalized_coordinates=False,
-        max_boxes_to_draw=None)
+        max_boxes_to_draw=None,
+        groundtruth_box_visualization_color=groundtruth_box_visualization_color)
  vis_utils.visualize_boxes_and_labels_on_image_array(
      image,
      detection_boxes,
@@ -144,13 +160,22 @@ def visualize_detection_results(result_dict,
      detection_scores,
      category_index,
      instance_masks=detection_masks,
+      instance_boundaries=detection_boundaries,
      keypoints=detection_keypoints,
      use_normalized_coordinates=False,
      max_boxes_to_draw=max_num_predictions,
      min_score_thresh=min_score_thresh,
-      agnostic_mode=agnostic_mode)
+      agnostic_mode=agnostic_mode,
+      skip_scores=skip_scores,
+      skip_labels=skip_labels)

  if export_dir:
+    if keep_image_id_for_visualization_export and result_dict[fields.
+                                                              InputDataFields()
+                                                              .key]:
+      export_path = os.path.join(export_dir, 'export-{}-{}.png'.format(
+          tag, result_dict[fields.InputDataFields().key]))
+    else:
      export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
    vis_utils.save_image_array_as_png(image, export_path)

@@ -161,9 +186,8 @@ def visualize_detection_results(result_dict,
              encoded_image_string=vis_utils.encode_image_array_as_png_str(
                  image)))
  ])
-  summary_writer = tf.summary.FileWriter(summary_dir)
+  summary_writer = tf.summary.FileWriterCache.get(summary_dir)
  summary_writer.add_summary(summary, global_step)
-  summary_writer.close()

  logging.info('Detection visualizations written to summary with tag %s.', tag)

@@ -260,8 +284,10 @@ def _run_checkpoint_once(tensor_dict,
            result_dict = {}
        else:
          result_dict = batch_processor(tensor_dict, sess, batch, counters)
+        if not result_dict:
+          continue
        for evaluator in evaluators:
-          # TODO: Use image_id tensor once we fix the input data
+          # TODO(b/65130867): Use image_id tensor once we fix the input data
          # decoders to return correct image_id.
          # TODO: result_dict contains batches of images, while
          # add_single_ground_truth_image_info expects a single image. Fix
@@ -422,9 +448,9 @@ def result_dict_for_single_example(image,
        (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
-    scale_to_absolute: Boolean indicating whether boxes, masks, keypoints should
-      be scaled to absolute coordinates. Note that for IoU based evaluations,
-      it does not matter whether boxes are expressed in absolute or relative
+    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
+      scaled to absolute coordinates. Note that for IoU based evaluations, it
+      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.

  Returns:
@@ -436,8 +462,8 @@ def result_dict_for_single_example(image,
      `scale_to_absolute`.
    'detection_scores': [max_detections] float32 tensor of scores.
    'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
-    'detection_masks': [max_detections, None, None] float32 tensor of binarized
-      masks. (Only present if available in `detections`)
+    'detection_masks': [max_detections, H, W] float32 tensor of binarized
+      masks, reframed to full image masks.
    'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
@@ -481,14 +507,17 @@ def result_dict_for_single_example(image,

  if detection_fields.detection_masks in detections:
    detection_masks = detections[detection_fields.detection_masks][0]
-    output_dict[detection_fields.detection_masks] = detection_masks
-    if scale_to_absolute:
    # TODO: This should be done in model's postprocess
    # function ideally.
+    num_detections = tf.to_int32(detections[detection_fields.num_detections][0])
+    detection_boxes = tf.slice(
+        detection_boxes, begin=[0, 0], size=[num_detections, -1])
+    detection_masks = tf.slice(
+        detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1])
    detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image_shape[1], image_shape[2])
-      detection_masks_reframed = tf.to_float(
-          tf.greater(detection_masks_reframed, 0.5))
+    detection_masks_reframed = tf.cast(
+        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
    output_dict[detection_fields.detection_masks] = detection_masks_reframed
  if detection_fields.detection_keypoints in detections:
    detection_keypoints = detections[detection_fields.detection_keypoints][0]
@@ -500,6 +529,9 @@ def result_dict_for_single_example(image,
          absolute_detection_keypoints)

  if groundtruth:
+    if input_data_fields.groundtruth_instance_masks in groundtruth:
+      groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast(
+          groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8)
    output_dict.update(groundtruth)
    if scale_to_absolute:
      groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes]

--- a/research/object_detection/evaluator.py
+++ b/research/object_detection/evaluator.py
@@ -24,20 +24,31 @@ import tensorflow as tf
 from object_detection import eval_util
 from object_detection.core import prefetcher
 from object_detection.core import standard_fields as fields
+from object_detection.metrics import coco_evaluation
 from object_detection.utils import object_detection_evaluation

 # A dictionary of metric names to classes that implement the metric. The classes
 # in the dictionary must implement
 # utils.object_detection_evaluation.DetectionEvaluator interface.
 EVAL_METRICS_CLASS_DICT = {
-    'pascal_voc_metrics':
+    'pascal_voc_detection_metrics':
        object_detection_evaluation.PascalDetectionEvaluator,
-    'weighted_pascal_voc_metrics':
+    'weighted_pascal_voc_detection_metrics':
        object_detection_evaluation.WeightedPascalDetectionEvaluator,
-    'open_images_metrics':
-        object_detection_evaluation.OpenImagesDetectionEvaluator
+    'pascal_voc_instance_segmentation_metrics':
+        object_detection_evaluation.PascalInstanceSegmentationEvaluator,
+    'weighted_pascal_voc_instance_segmentation_metrics':
+        object_detection_evaluation.WeightedPascalInstanceSegmentationEvaluator,
+    'open_images_detection_metrics':
+        object_detection_evaluation.OpenImagesDetectionEvaluator,
+    'coco_detection_metrics':
+        coco_evaluation.CocoDetectionEvaluator,
+    'coco_mask_metrics':
+        coco_evaluation.CocoMaskEvaluator,
 }

+EVAL_DEFAULT_METRIC = 'pascal_voc_detection_metrics'
+

 def _extract_prediction_tensors(model,
                                create_input_dict_fn,
@@ -56,9 +67,10 @@ def _extract_prediction_tensors(model,
  prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
  input_dict = prefetch_queue.dequeue()
  original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
-  preprocessed_image = model.preprocess(tf.to_float(original_image))
-  prediction_dict = model.predict(preprocessed_image)
-  detections = model.postprocess(prediction_dict)
+  preprocessed_image, true_image_shapes = model.preprocess(
+      tf.to_float(original_image))
+  prediction_dict = model.predict(preprocessed_image, true_image_shapes)
+  detections = model.postprocess(prediction_dict, true_image_shapes)

  groundtruth = None
  if not ignore_groundtruth:
@@ -103,17 +115,20 @@ def get_evaluators(eval_config, categories):
  Raises:
    ValueError: if metric is not in the metric class dictionary.
  """
-  eval_metric_fn_key = eval_config.metrics_set
+  eval_metric_fn_keys = eval_config.metrics_set
+  if not eval_metric_fn_keys:
+    eval_metric_fn_keys = [EVAL_DEFAULT_METRIC]
+  evaluators_list = []
+  for eval_metric_fn_key in eval_metric_fn_keys:
    if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT:
      raise ValueError('Metric not found: {}'.format(eval_metric_fn_key))
-  return [
-      EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](
-          categories=categories)
-  ]
+    evaluators_list.append(
+        EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](categories=categories))
+  return evaluators_list


 def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
-             checkpoint_dir, eval_dir):
+             checkpoint_dir, eval_dir, graph_hook_fn=None):
  """Evaluation function for detection models.

  Args:
@@ -124,6 +139,10 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
                have an integer 'id' field and string 'name' field.
    checkpoint_dir: directory to load the checkpoints to evaluate from.
    eval_dir: directory to write evaluation metrics summary to.
+    graph_hook_fn: Optional function that is called after the training graph is
+      completely built. This is helpful to perform additional changes to the
+      training graph such as optimizing batchnorm. The function should modify
+      the default graph.

  Returns:
    metrics: A dictionary containing metric names and values from the latest
@@ -177,12 +196,23 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
          categories=categories,
          summary_dir=eval_dir,
          export_dir=eval_config.visualization_export_dir,
-          show_groundtruth=eval_config.visualization_export_dir)
+          show_groundtruth=eval_config.visualize_groundtruth_boxes,
+          groundtruth_box_visualization_color=eval_config.
+          groundtruth_box_visualization_color,
+          min_score_thresh=eval_config.min_score_threshold,
+          max_num_predictions=eval_config.max_num_boxes_to_visualize,
+          skip_scores=eval_config.skip_scores,
+          skip_labels=eval_config.skip_labels,
+          keep_image_id_for_visualization_export=eval_config.
+          keep_image_id_for_visualization_export)
    return result_dict

  variables_to_restore = tf.global_variables()
  global_step = tf.train.get_or_create_global_step()
  variables_to_restore.append(global_step)
+
+  if graph_hook_fn: graph_hook_fn()
+
  if eval_config.use_moving_averages:
    variable_averages = tf.train.ExponentialMovingAverage(0.0)
    variables_to_restore = variable_averages.variables_to_restore()

--- a/research/object_detection/export_inference_graph.py
+++ b/research/object_detection/export_inference_graph.py
@@ -65,6 +65,31 @@ with contents:
 - model.ckpt.meta
 - frozen_inference_graph.pb
 + saved_model (a directory)
+
+Config overrides (see the `config_override` flag) are text protobufs
+(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override
+certain fields in the provided pipeline_config_path.  These are useful for
+making small changes to the inference graph that differ from the training or
+eval config.
+
+Example Usage (in which we change the second stage post-processing score
+threshold to be 0.5):
+
+python export_inference_graph \
+    --input_type image_tensor \
+    --pipeline_config_path path/to/ssd_inception_v2.config \
+    --trained_checkpoint_prefix path/to/model.ckpt \
+    --output_directory path/to/exported_model_directory \
+    --config_override " \
+            model{ \
+              faster_rcnn { \
+                second_stage_post_processing { \
+                  batch_non_max_suppression { \
+                    score_threshold: 0.5 \
+                  } \
+                } \
+              } \
+            }"
 """
 import tensorflow as tf
 from google.protobuf import text_format
@@ -92,7 +117,9 @@ flags.DEFINE_string('trained_checkpoint_prefix', None,
                    'Path to trained checkpoint, typically of the form '
                    'path/to/model.ckpt')
 flags.DEFINE_string('output_directory', None, 'Path to write outputs.')
-
+flags.DEFINE_string('config_override', '',
+                    'pipeline_pb2.TrainEvalPipelineConfig '
+                    'text proto to override pipeline_config_path.')
 tf.app.flags.mark_flag_as_required('pipeline_config_path')
 tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
 tf.app.flags.mark_flag_as_required('output_directory')
@@ -103,6 +130,7 @@ def main(_):
  pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
  with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
    text_format.Merge(f.read(), pipeline_config)
+  text_format.Merge(FLAGS.config_override, pipeline_config)
  if FLAGS.input_shape:
    input_shape = [
        int(dim) if dim != '-1' else None

--- a/research/object_detection/exporter.py
+++ b/research/object_detection/exporter.py
@@ -18,7 +18,8 @@ import logging
 import os
 import tempfile
 import tensorflow as tf
-from tensorflow.core.protobuf import rewriter_config_pb2
+from google.protobuf import text_format
+from tensorflow.core.protobuf import saver_pb2
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.client import session
 from tensorflow.python.framework import graph_util
@@ -43,7 +44,6 @@ def freeze_graph_with_def_protos(
    filename_tensor_name,
    clear_devices,
    initializer_nodes,
-    optimize_graph=True,
    variable_names_blacklist=''):
  """Converts all variables in a graph and checkpoint into constants."""
  del restore_op_name, filename_tensor_name  # Unused by updated loading code.
@@ -65,20 +65,7 @@ def freeze_graph_with_def_protos(

  with tf.Graph().as_default():
    tf.import_graph_def(input_graph_def, name='')
-
-    if optimize_graph:
-      logging.info('Graph Rewriter optimizations enabled')
-      rewrite_options = rewriter_config_pb2.RewriterConfig(
-          layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
-      rewrite_options.optimizers.append('pruning')
-      rewrite_options.optimizers.append('constfold')
-      rewrite_options.optimizers.append('layout')
-      graph_options = tf.GraphOptions(
-          rewrite_options=rewrite_options, infer_shapes=True)
-    else:
-      logging.info('Graph Rewriter optimizations disabled')
-      graph_options = tf.GraphOptions()
-    config = tf.ConfigProto(graph_options=graph_options)
+    config = tf.ConfigProto(graph_options=tf.GraphOptions())
    with session.Session(config=config) as sess:
      if input_saver_def:
        saver = saver_lib.Saver(saver_def=input_saver_def)
@@ -227,23 +214,31 @@ def _add_output_tensor_nodes(postprocessed_tensors,
  Returns:
    A tensor dict containing the added output tensor nodes.
  """
+  detection_fields = fields.DetectionResultFields
  label_id_offset = 1
-  boxes = postprocessed_tensors.get('detection_boxes')
-  scores = postprocessed_tensors.get('detection_scores')
-  classes = postprocessed_tensors.get('detection_classes') + label_id_offset
-  masks = postprocessed_tensors.get('detection_masks')
-  num_detections = postprocessed_tensors.get('num_detections')
+  boxes = postprocessed_tensors.get(detection_fields.detection_boxes)
+  scores = postprocessed_tensors.get(detection_fields.detection_scores)
+  classes = postprocessed_tensors.get(
+      detection_fields.detection_classes) + label_id_offset
+  masks = postprocessed_tensors.get(detection_fields.detection_masks)
+  num_detections = postprocessed_tensors.get(detection_fields.num_detections)
  outputs = {}
-  outputs['detection_boxes'] = tf.identity(boxes, name='detection_boxes')
-  outputs['detection_scores'] = tf.identity(scores, name='detection_scores')
-  outputs['detection_classes'] = tf.identity(classes, name='detection_classes')
-  outputs['num_detections'] = tf.identity(num_detections, name='num_detections')
+  outputs[detection_fields.detection_boxes] = tf.identity(
+      boxes, name=detection_fields.detection_boxes)
+  outputs[detection_fields.detection_scores] = tf.identity(
+      scores, name=detection_fields.detection_scores)
+  outputs[detection_fields.detection_classes] = tf.identity(
+      classes, name=detection_fields.detection_classes)
+  outputs[detection_fields.num_detections] = tf.identity(
+      num_detections, name=detection_fields.num_detections)
  if masks is not None:
-    outputs['detection_masks'] = tf.identity(masks, name='detection_masks')
+    outputs[detection_fields.detection_masks] = tf.identity(
+        masks, name=detection_fields.detection_masks)
  for output_key in outputs:
    tf.add_to_collection(output_collection_name, outputs[output_key])
  if masks is not None:
-    tf.add_to_collection(output_collection_name, outputs['detection_masks'])
+    tf.add_to_collection(output_collection_name,
+                         outputs[detection_fields.detection_masks])
  return outputs


@@ -328,8 +323,8 @@ def _export_inference_graph(input_type,
                            output_directory,
                            additional_output_tensor_names=None,
                            input_shape=None,
-                            optimize_graph=True,
-                            output_collection_name='inference_op'):
+                            output_collection_name='inference_op',
+                            graph_hook_fn=None):
  """Export helper."""
  tf.gfile.MakeDirs(output_directory)
  frozen_graph_path = os.path.join(output_directory,
@@ -348,24 +343,34 @@ def _export_inference_graph(input_type,
  placeholder_tensor, input_tensors = input_placeholder_fn_map[input_type](
      **placeholder_args)
  inputs = tf.to_float(input_tensors)
-  preprocessed_inputs = detection_model.preprocess(inputs)
-  output_tensors = detection_model.predict(preprocessed_inputs)
-  postprocessed_tensors = detection_model.postprocess(output_tensors)
+  preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs)
+  output_tensors = detection_model.predict(
+      preprocessed_inputs, true_image_shapes)
+  postprocessed_tensors = detection_model.postprocess(
+      output_tensors, true_image_shapes)
  outputs = _add_output_tensor_nodes(postprocessed_tensors,
                                     output_collection_name)
  # Add global step to the graph.
  slim.get_or_create_global_step()

+  if graph_hook_fn: graph_hook_fn()
+
+  saver_kwargs = {}
  if use_moving_averages:
-    temp_checkpoint_file = tempfile.NamedTemporaryFile()
+    # This check is to be compatible with both version of SaverDef.
+    if os.path.isfile(trained_checkpoint_prefix):
+      saver_kwargs['write_version'] = saver_pb2.SaverDef.V1
+      temp_checkpoint_prefix = tempfile.NamedTemporaryFile().name
+    else:
+      temp_checkpoint_prefix = tempfile.mkdtemp()
    replace_variable_values_with_moving_averages(
        tf.get_default_graph(), trained_checkpoint_prefix,
-        temp_checkpoint_file.name)
-    checkpoint_to_use = temp_checkpoint_file.name
+        temp_checkpoint_prefix)
+    checkpoint_to_use = temp_checkpoint_prefix
  else:
    checkpoint_to_use = trained_checkpoint_prefix

-  saver = tf.train.Saver()
+  saver = tf.train.Saver(**saver_kwargs)
  input_saver_def = saver.as_saver_def()

  _write_graph_and_checkpoint(
@@ -387,7 +392,6 @@ def _export_inference_graph(input_type,
      restore_op_name='save/restore_all',
      filename_tensor_name='save/Const:0',
      clear_devices=True,
-      optimize_graph=optimize_graph,
      initializer_nodes='')
  _write_frozen_graph(frozen_graph_path, frozen_graph_def)
  _write_saved_model(saved_model_path, frozen_graph_def,
@@ -399,7 +403,6 @@ def export_inference_graph(input_type,
                           trained_checkpoint_prefix,
                           output_directory,
                           input_shape=None,
-                           optimize_graph=True,
                           output_collection_name='inference_op',
                           additional_output_tensor_names=None):
  """Exports inference graph for the model specified in the pipeline config.
@@ -412,7 +415,6 @@ def export_inference_graph(input_type,
    output_directory: Path to write outputs.
    input_shape: Sets a fixed shape for an `image_tensor` input. If not
      specified, will default to [None, None, None, 3].
-    optimize_graph: Whether to optimize graph using Grappler.
    output_collection_name: Name of collection to add output tensors to.
      If None, does not add output tensors to a collection.
    additional_output_tensor_names: list of additional output
@@ -424,4 +426,10 @@ def export_inference_graph(input_type,
                          pipeline_config.eval_config.use_moving_averages,
                          trained_checkpoint_prefix,
                          output_directory, additional_output_tensor_names,
-                          input_shape, optimize_graph, output_collection_name)
+                          input_shape, output_collection_name,
+                          graph_hook_fn=None)
+  pipeline_config.eval_config.use_moving_averages = False
+  config_text = text_format.MessageToString(pipeline_config)
+  with tf.gfile.Open(
+      os.path.join(output_directory, 'pipeline.config'), 'wb') as f:
+    f.write(config_text)
--- a/research/object_detection/exporter_test.py
+++ b/research/object_detection/exporter_test.py
@@ -18,6 +18,7 @@ import os
 import numpy as np
 import six
 import tensorflow as tf
+from google.protobuf import text_format
 from object_detection import exporter
 from object_detection.builders import model_builder
 from object_detection.core import model
@@ -37,12 +38,13 @@ class FakeModel(model.DetectionModel):
    self._add_detection_masks = add_detection_masks

  def preprocess(self, inputs):
-    return tf.identity(inputs)
+    true_image_shapes = []  # Doesn't matter for the fake model.
+    return tf.identity(inputs), true_image_shapes

-  def predict(self, preprocessed_inputs):
+  def predict(self, preprocessed_inputs, true_image_shapes):
    return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}

-  def postprocess(self, prediction_dict):
+  def postprocess(self, prediction_dict, true_image_shapes):
    with tf.control_dependencies(prediction_dict.values()):
      postprocessed_tensors = {
          'detection_boxes': tf.constant([[[0.0, 0.0, 0.5, 0.5],
@@ -63,7 +65,7 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, from_detection_checkpoint):
    pass

-  def loss(self, prediction_dict):
+  def loss(self, prediction_dict, true_image_shapes):
    pass


@@ -74,10 +76,10 @@ class ExportInferenceGraphTest(tf.test.TestCase):
    g = tf.Graph()
    with g.as_default():
      mock_model = FakeModel()
-      preprocessed_inputs = mock_model.preprocess(
+      preprocessed_inputs, true_image_shapes = mock_model.preprocess(
          tf.placeholder(tf.float32, shape=[None, None, None, 3]))
-      predictions = mock_model.predict(preprocessed_inputs)
-      mock_model.postprocess(predictions)
+      predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
+      mock_model.postprocess(predictions, true_image_shapes)
      if use_moving_averages:
        tf.train.ExponentialMovingAverage(0.0).apply()
      slim.get_or_create_global_step()
@@ -213,10 +215,10 @@ class ExportInferenceGraphTest(tf.test.TestCase):
    graph = tf.Graph()
    with graph.as_default():
      fake_model = FakeModel()
-      preprocessed_inputs = fake_model.preprocess(
+      preprocessed_inputs, true_image_shapes = fake_model.preprocess(
          tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]))
-      predictions = fake_model.predict(preprocessed_inputs)
-      fake_model.postprocess(predictions)
+      predictions = fake_model.predict(preprocessed_inputs, true_image_shapes)
+      fake_model.postprocess(predictions, true_image_shapes)
      exporter.replace_variable_values_with_moving_averages(
          graph, trained_checkpoint_prefix, new_checkpoint_prefix)

@@ -448,7 +450,7 @@ class ExportInferenceGraphTest(tf.test.TestCase):
      masks = inference_graph.get_tensor_by_name('detection_masks:0')
      num_detections = inference_graph.get_tensor_by_name('num_detections:0')
      with self.assertRaisesRegexp(tf.errors.InvalidArgumentError,
-                                   '^TensorArray has inconsistent shapes.'):
+                                   'TensorArray.*shape'):
        sess.run([boxes, scores, classes, masks, num_detections],
                 feed_dict={image_str_tensor: image_str_batch_np})

@@ -495,6 +497,31 @@ class ExportInferenceGraphTest(tf.test.TestCase):
      self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
      self.assertAllClose(num_detections_np, [2, 1])

+  def test_export_graph_saves_pipeline_file(self):
+    tmp_dir = self.get_temp_dir()
+    trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
+    self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
+                                          use_moving_averages=True)
+    output_directory = os.path.join(tmp_dir, 'output')
+    with mock.patch.object(
+        model_builder, 'build', autospec=True) as mock_builder:
+      mock_builder.return_value = FakeModel()
+      pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+      exporter.export_inference_graph(
+          input_type='image_tensor',
+          pipeline_config=pipeline_config,
+          trained_checkpoint_prefix=trained_checkpoint_prefix,
+          output_directory=output_directory)
+      expected_pipeline_path = os.path.join(
+          output_directory, 'pipeline.config')
+      self.assertTrue(os.path.exists(expected_pipeline_path))
+
+      written_pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+      with tf.gfile.GFile(expected_pipeline_path, 'r') as f:
+        proto_str = f.read()
+        text_format.Merge(proto_str, written_pipeline_config)
+        self.assertProtoEquals(pipeline_config, written_pipeline_config)
+
  def test_export_saved_model_and_run_inference(self):
    tmp_dir = self.get_temp_dir()
    trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')

--- a/research/object_detection/g3doc/detection_model_zoo.md
+++ b/research/object_detection/g3doc/detection_model_zoo.md
@@ -23,7 +23,7 @@ In the table below, we list each such pre-trained model including:
 * detector performance on subset of the COCO validation set or Open Images test split as measured by the dataset-specific mAP measure.
  Here, higher is better, and we only report bounding box mAP rounded to the
  nearest integer.
-* Output types (currently only `Boxes`)
+* Output types (`Boxes`, and `Masks` if applicable )

 You can un-tar each tar.gz file via, e.g.,:

@@ -55,7 +55,7 @@ Some remarks on frozen inference graphs:
  a detector (and discarding the part past that point), which negatively impacts
  standard mAP metrics.
 * Our frozen inference graphs are generated using the
-  [v1.4.0](https://github.com/tensorflow/tensorflow/tree/v1.4.0)
+  [v1.5.0](https://github.com/tensorflow/tensorflow/tree/v1.5.0)
  release version of Tensorflow and we do not guarantee that these will work
  with other versions; this being said, each frozen inference graph can be
  regenerated using your current version of Tensorflow by re-running the
@@ -69,16 +69,20 @@ Some remarks on frozen inference graphs:
 | ------------ | :--------------: | :--------------: | :-------------: |
 | [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz) | 30 | 21 | Boxes |
 | [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz) | 42 | 24 | Boxes |
-| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2017_11_08.tar.gz) | 58 | 28 | Boxes |
-| [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2017_11_08.tar.gz) | 89 | 30 | Boxes |
-| [faster_rcnn_resnet50_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2017_11_08.tar.gz) | 64 |  | Boxes |
-| [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_2017_11_08.tar.gz)  | 92 | 30 | Boxes |
-| [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2017_11_08.tar.gz) | 106 | 32 | Boxes |
-| [faster_rcnn_resnet101_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2017_11_08.tar.gz) | 82 |  | Boxes |
-| [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_2017_11_08.tar.gz) | 620 | 37 | Boxes |
-| [faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2017_11_08.tar.gz) | 241 |  | Boxes |
-| [faster_rcnn_nas](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2017_11_08.tar.gz) | 1833 | 43 | Boxes |
-| [faster_rcnn_nas_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2017_11_08.tar.gz) | 540 |  | Boxes |
+| [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 58 | 28 | Boxes |
+| [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz) | 89 | 30 | Boxes |
+| [faster_rcnn_resnet50_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2018_01_28.tar.gz) | 64 |  | Boxes |
+| [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_2018_01_28.tar.gz)  | 92 | 30 | Boxes |
+| [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2018_01_28.tar.gz) | 106 | 32 | Boxes |
+| [faster_rcnn_resnet101_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2018_01_28.tar.gz) | 82 |  | Boxes |
+| [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz) | 620 | 37 | Boxes |
+| [faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2018_01_28.tar.gz) | 241 |  | Boxes |
+| [faster_rcnn_nas](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2018_01_28.tar.gz) | 1833 | 43 | Boxes |
+| [faster_rcnn_nas_lowproposals_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2018_01_28.tar.gz) | 540 |  | Boxes |
+| [mask_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz) | 771 | 36 | Masks |
+| [mask_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 79 | 25 | Masks |
+| [mask_rcnn_resnet101_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet101_atrous_coco_2018_01_28.tar.gz) | 470 | 33 | Masks |
+| [mask_rcnn_resnet50_atrous_coco](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet50_atrous_coco_2018_01_28.tar.gz) | 343 | 29 | Masks |



@@ -86,14 +90,14 @@ Some remarks on frozen inference graphs:

 Model name                                                                                                                                                        | Speed (ms) | Pascal mAP@0.5 (ms) | Outputs
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
-[faster_rcnn_resnet101_kitti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2017_11_08.tar.gz) | 79  | 87              | Boxes
+[faster_rcnn_resnet101_kitti](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2018_01_28.tar.gz) | 79  | 87              | Boxes

 ## Open Images-trained models {#open-images-models}

 Model name                                                                                                                                                        | Speed (ms) | Open Images mAP@0.5[^2] | Outputs
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
-[faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2017_11_08.tar.gz) | 727 | 37              | Boxes
-[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2017_11_08.tar.gz) | 347  |               | Boxes
+[faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz) | 727 | 37              | Boxes
+[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz) | 347  |               | Boxes


 [^1]: See [MSCOCO evaluation protocol](http://cocodataset.org/#detections-eval).

--- a/research/object_detection/g3doc/evaluation_protocols.md
+++ b/research/object_detection/g3doc/evaluation_protocols.md
@@ -4,17 +4,16 @@ The Tensorflow Object Detection API currently supports three evaluation protocol
 that can be configured in `EvalConfig` by setting `metrics_set` to the
 corresponding value.

-## PASCAL VOC 2007 metric
+## PASCAL VOC 2007 detection metric

-`EvalConfig.metrics_set='pascal_voc_metrics'`
+`EvalConfig.metrics_set='pascal_voc_detection_metrics'`

 The commonly used mAP metric for evaluating the quality of object detectors, computed according to the protocol of the PASCAL VOC Challenge 2007.
 The protocol is available [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/devkit_doc_07-Jun-2007.pdf).

+## Weighted PASCAL VOC detection metric

-## Weighted PASCAL VOC metric
-
-`EvalConfig.metrics_set='weighted_pascal_voc_metrics'`
+`EvalConfig.metrics_set='weighted_pascal_voc_detection_metrics'`

 The weighted PASCAL metric computes the mean average precision as the average
 precision when treating all classes as a single class. In comparison,
@@ -25,7 +24,21 @@ For example, the test set consists of two classes, "cat" and "dog", and there ar
 According to PASCAL VOC 2007 metric, performance on each of the two classes would contribute equally towards the final mAP value,
 while for the Weighted PASCAL VOC metric the final mAP value will be influenced by frequency of each class.

-## Open Images metric {#open-images}
+## PASCAL VOC 2007 instance segmentation metric
+
+`EvalConfig.metrics_set='pascal_voc_instance_segmentation_metrics'`
+
+Similar to pascal voc 2007 detection metric, but computes the intersection over
+union based on the object masks instead of object boxes.
+
+## Weighted PASCAL VOC detection metric
+
+`EvalConfig.metrics_set='weighted_pascal_voc_instance_segmentation_metrics'`
+
+Similar to the weighted pascal voc 2007 detection metric, but computes the
+intersection over union based on the object masks instead of object boxes.
+
+## Open Images detection metric {#open-images}

 `EvalConfig.metrics_set='open_images_metrics'`


--- a/research/object_detection/g3doc/exporting_models.md
+++ b/research/object_detection/g3doc/exporting_models.md
@@ -8,7 +8,7 @@ graph proto. A checkpoint will typically consist of three files:
 * model.ckpt-${CHECKPOINT_NUMBER}.meta

 After you've identified a candidate checkpoint to export, run the following
-command from tensorflow/models/research/:
+command from tensorflow/models/research:

 ``` bash
 # From tensorflow/models/research/

--- a/research/object_detection/g3doc/img/kites_with_segment_overlay.png
+++ b/research/object_detection/g3doc/img/kites_with_segment_overlay.png
--- a/research/object_detection/g3doc/instance_segmentation.md
+++ b/research/object_detection/g3doc/instance_segmentation.md
+## Run an Instance Segmentation Model
+
+For some applications it isn't adequate enough to localize an object with a
+simple bounding box. For instance, you might want to segment an object region
+once it is detected. This class of problems is called **instance segmentation**.
+
+<p align="center">
+  <img src="img/kites_with_segment_overlay.png" width=676 height=450>
+</p>
+
+### Materializing data for instance segmentation {#materializing-instance-seg}
+
+Instance segmentation is an extension of object detection, where a binary mask
+(i.e. object vs. background) is associated with every bounding box. This allows
+for more fine-grained information about the extent of the object within the box.
+To train an instance segmentation model, a groundtruth mask must be supplied for
+every groundtruth bounding box. In additional to the proto fields listed in the
+section titled [Using your own dataset](using_your_own_dataset.md), one must
+also supply `image/object/mask`, which can either be a repeated list of
+single-channel encoded PNG strings, or a single dense 3D binary tensor where
+masks corresponding to each object are stacked along the first dimension. Each
+is described in more detail below.
+
+#### PNG Instance Segmentation Masks
+
+Instance segmentation masks can be supplied as serialized PNG images.
+
+```shell
+image/object/mask = ["\x89PNG\r\n\x1A\n\x00\x00\x00\rIHDR\...", ...]
+```
+
+These masks are whole-image masks, one for each object instance. The spatial
+dimensions of each mask must agree with the image. Each mask has only a single
+channel, and the pixel values are either 0 (background) or 1 (object mask).
+**PNG masks are the preferred parameterization since they offer considerable
+space savings compared to dense numerical masks.**
+
+#### Dense Numerical Instance Segmentation Masks
+
+Masks can also be specified via a dense numerical tensor.
+
+```shell
+image/object/mask = [0.0, 0.0, 1.0, 1.0, 0.0, ...]
+```
+
+For an image with dimensions `H` x `W` and `num_boxes` groundtruth boxes, the
+mask corresponds to a [`num_boxes`, `H`, `W`] float32 tensor, flattened into a
+single vector of shape `num_boxes` * `H` * `W`. In TensorFlow, examples are read
+in row-major format, so the elements are organized as:
+
+```shell
+... mask 0 row 0 ... mask 0 row 1 ... // ... mask 0 row H-1 ... mask 1 row 0 ...
+```
+
+where each row has W contiguous binary values.
+
+To see an example tf-records with mask labels, see the examples under the
+[Preparing Inputs](preparing_inputs.md) section.
+
+### Pre-existing config files
+
+We provide four instance segmentation config files that you can use to train
+your own models:
+
+1.  <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/mask_rcnn_inception_resnet_v2_atrous_coco.config" target=_blank>mask_rcnn_inception_resnet_v2_atrous_coco</a>
+1.  <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/mask_rcnn_resnet101_atrous_coco.config" target=_blank>mask_rcnn_resnet101_atrous_coco</a>
+1.  <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/mask_rcnn_resnet50_atrous_coco.config" target=_blank>mask_rcnn_resnet50_atrous_coco</a>
+1.  <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/mask_rcnn_inception_v2_coco.config" target=_blank>mask_rcnn_inception_v2_coco</a>
+
+For more details see the [detection model zoo](detection_model_zoo.md).
+
+### Updating a Faster R-CNN config file
+
+Currently, the only supported instance segmentation model is [Mask
+R-CNN](https://arxiv.org/abs/1703.06870), which requires Faster R-CNN as the
+backbone object detector.
+
+Once you have a baseline Faster R-CNN pipeline configuration, you can make the
+following modifications in order to convert it into a Mask R-CNN model.
+
+1.  Within `train_input_reader` and `eval_input_reader`, set
+    `load_instance_masks` to `True`. If using PNG masks, set `mask_type` to
+    `PNG_MASKS`, otherwise you can leave it as the default 'NUMERICAL_MASKS'.
+1.  Within the `faster_rcnn` config, use a `MaskRCNNBoxPredictor` as the
+    `second_stage_box_predictor`.
+1.  Within the `MaskRCNNBoxPredictor` message, set `predict_instance_masks` to
+    `True`. You must also define `conv_hyperparams`.
+1.  Within the `faster_rcnn` message, set `number_of_stages` to `3`.
+1.  Add instance segmentation metrics to the set of metrics:
+    `'coco_mask_metrics'`.
+1.  Update the `input_path`s to point at your data.
+
+Please refer to the section on [Running the pets dataset](running_pets.md) for
+additional details.
+
+> Note: The mask prediction branch consists of a sequence of convolution layers.
+> You can set the number of convolution layers and their depth as follows:
+>
+> 1.  Within the `MaskRCNNBoxPredictor` message, set the
+>     `mask_prediction_conv_depth` to your value of interest. The default value
+>     is 256. If you set it to `0` (recommended), the depth is computed
+>     automatically based on the number of classes in the dataset.
+> 1.  Within the `MaskRCNNBoxPredictor` message, set the
+>     `mask_prediction_num_conv_layers` to your value of interest. The default
+>     value is 2.
--- a/research/object_detection/g3doc/running_pets.md
+++ b/research/object_detection/g3doc/running_pets.md
@@ -308,6 +308,21 @@ python object_detection/export_inference_graph.py \
 Afterwards, you should see a directory named `exported_graphs` containing the
 SavedModel and frozen graph.

+## Configuring the Instance Segmentation Pipeline
+
+Mask prediction can be turned on for an object detection config by adding
+`predict_instance_masks: true` within the `MaskRCNNBoxPredictor`. Other
+parameters such as mask size, number of convolutions in the mask layer, and the
+convolution hyper parameters can be defined. We will use
+`mask_rcnn_resnet101_pets.config` as a starting point for configuring the
+instance segmentation pipeline. Everything above that was mentioned about object
+detection holds true for instance segmentation. Instance segmentation consists
+of an object detection model with an additional head that predicts the object
+mask inside each predicted box once we remove the training and other details.
+Please refer to the section on [Running an Instance Segmentation
+Model](instance_segmentation.md) for instructions on how to configure a model
+that predicts masks in addition to object bounding boxes.
+
 ## What's Next

 Congratulations, you have now trained an object detector for various cats and

--- a/research/object_detection/g3doc/using_your_own_dataset.md
+++ b/research/object_detection/g3doc/using_your_own_dataset.md
@@ -103,7 +103,7 @@ FLAGS = flags.FLAGS


 def create_tf_example(example):
-  # TODO(user): Populate the following variables from your example.
+  # TODO: Populate the following variables from your example.
  height = None # Image height
  width = None # Image width
  filename = None # Filename of the image. Empty if image is not from file
@@ -139,7 +139,7 @@ def create_tf_example(example):
 def main(_):
  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

-  # TODO(user): Write code to read in your dataset to examples variable
+  # TODO: Write code to read in your dataset to examples variable

  for example in examples:
    tf_example = create_tf_example(example)
@@ -155,3 +155,7 @@ if __name__ == '__main__':

 Note: You may notice additional fields in some other datasets. They are
 currently unused by the API and are optional.
+
+Note: Please refer to the section on [Running an Instance Segmentation
+Model](instance_segmentation.md) for instructions on how to configure a model
+that predicts masks in addition to object bounding boxes.
--- a/research/object_detection/inference/BUILD
+++ b/research/object_detection/inference/BUILD
@@ -13,7 +13,7 @@ py_library(
    srcs = ["detection_inference.py"],
    deps = [
        "//tensorflow",
-        "//tensorflow_models/object_detection/core:standard_fields",
+        "//tensorflow/models/research/object_detection/core:standard_fields",
    ],
 )

@@ -22,11 +22,11 @@ py_test(
    srcs = ["detection_inference_test.py"],
    deps = [
        ":detection_inference",
-        "//third_party/py/PIL:pil",
-        "//third_party/py/numpy",
+        "//PIL:pil",
+        "//numpy",
        "//tensorflow",
-        "//tensorflow_models/object_detection/core:standard_fields",
-        "//tensorflow_models/object_detection/utils:dataset_util",
+        "//tensorflow/models/research/object_detection/core:standard_fields",
+        "//tensorflow/models/research/object_detection/utils:dataset_util",
    ],
 )


--- a/research/object_detection/inference/__init__.py
+++ b/research/object_detection/inference/__init__.py
--- a/research/object_detection/inference/detection_inference_test.py
+++ b/research/object_detection/inference/detection_inference_test.py
@@ -17,6 +17,7 @@ r"""Tests for detection_inference.py."""
 import os
 import StringIO

+
 import numpy as np
 from PIL import Image
 import tensorflow as tf