add flag for saving images to summary; strings moved to common.py'

b3247557 · Dheera Venkatraman · 75c931fd · 2041d5ca · b3247557 · b3247557
Commit b3247557 authored Apr 04, 2018 by Dheera Venkatraman
20 changed files
--- a/research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor_test.py
@@ -44,7 +44,7 @@ class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase):
    with self.test_session() as sess:
      sess.run(init_op)
      features_shape_out = sess.run(features_shape)
-      self.assertAllEqual(features_shape_out, [4, 7, 7, 1024])
+      self.assertAllEqual(features_shape_out, [4, 14, 14, 512])

  def test_extract_proposal_features_stride_eight(self):
    feature_extractor = self._build_feature_extractor(
@@ -59,7 +59,7 @@ class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase):
    with self.test_session() as sess:
      sess.run(init_op)
      features_shape_out = sess.run(features_shape)
-      self.assertAllEqual(features_shape_out, [4, 7, 7, 1024])
+      self.assertAllEqual(features_shape_out, [4, 14, 14, 512])

  def test_extract_proposal_features_half_size_input(self):
    feature_extractor = self._build_feature_extractor(
@@ -74,7 +74,7 @@ class FasterRcnnMobilenetV1FeatureExtractorTest(tf.test.TestCase):
    with self.test_session() as sess:
      sess.run(init_op)
      features_shape_out = sess.run(features_shape)
-      self.assertAllEqual(features_shape_out, [1, 4, 4, 1024])
+      self.assertAllEqual(features_shape_out, [1, 7, 7, 512])

  def test_extract_proposal_features_dies_on_invalid_stride(self):
    with self.assertRaises(ValueError):

--- a/research/object_detection/models/faster_rcnn_pnas_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_pnas_feature_extractor.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""PNASNet Faster R-CNN implementation.
+
+Based on PNASNet model: https://arxiv.org/abs/1712.00559
+"""
+
+import tensorflow as tf
+
+from object_detection.meta_architectures import faster_rcnn_meta_arch
+from nets.nasnet import nasnet_utils
+from nets.nasnet import pnasnet
+
+arg_scope = tf.contrib.framework.arg_scope
+slim = tf.contrib.slim
+
+
+def pnasnet_large_arg_scope_for_detection(is_batch_norm_training=False):
+  """Defines the default arg scope for the PNASNet Large for object detection.
+
+  This provides a small edit to switch batch norm training on and off.
+
+  Args:
+    is_batch_norm_training: Boolean indicating whether to train with batch norm.
+
+  Returns:
+    An `arg_scope` to use for the PNASNet Large Model.
+  """
+  imagenet_scope = pnasnet.pnasnet_large_arg_scope()
+  with arg_scope(imagenet_scope):
+    with arg_scope([slim.batch_norm], is_training=is_batch_norm_training) as sc:
+      return sc
+
+
+def _filter_scaling(reduction_indices, start_cell_num):
+  """Compute the expected filter scaling at given PNASNet cell start_cell_num.
+
+  In the pnasnet.py code, filter_scaling starts at 1.0. We instead
+  adapt filter scaling to depend on the starting cell.
+  At first cells, before any reduction, filter_scalling is 1.0. With passing
+  any reduction cell, the filter_scaling is multiplied by 2.
+
+  Args:
+    reduction_indices: list of int indices.
+    start_cell_num: int.
+  Returns:
+    filter_scaling: float.
+  """
+  filter_scaling = 1.0
+  for ind in reduction_indices:
+    if ind < start_cell_num:
+      filter_scaling *= 2.0
+  return filter_scaling
+
+
+# Note: This is largely a copy of _build_pnasnet_base inside pnasnet.py but
+# with special edits to remove instantiation of the stem and the special
+# ability to receive as input a pair of hidden states. It constructs only
+# a sub-network from the original PNASNet model, starting from the
+# start_cell_num cell and with modified final layer.
+def _build_pnasnet_base(
+    hidden_previous, hidden, normal_cell, hparams, true_cell_num,
+    start_cell_num):
+  """Constructs a PNASNet image model for proposal classifier features."""
+
+  # Find where to place the reduction cells or stride normal cells
+  reduction_indices = nasnet_utils.calc_reduction_layers(
+      hparams.num_cells, hparams.num_reduction_layers)
+  filter_scaling = _filter_scaling(reduction_indices, start_cell_num)
+
+  # Note: The None is prepended to match the behavior of _imagenet_stem()
+  cell_outputs = [None, hidden_previous, hidden]
+  net = hidden
+
+  # Run the cells
+  for cell_num in range(start_cell_num, hparams.num_cells):
+    is_reduction = cell_num in reduction_indices
+    stride = 2 if is_reduction else 1
+    if is_reduction: filter_scaling *= hparams.filter_scaling_rate
+    prev_layer = cell_outputs[-2]
+    net = normal_cell(
+        net,
+        scope='cell_{}'.format(cell_num),
+        filter_scaling=filter_scaling,
+        stride=stride,
+        prev_layer=prev_layer,
+        cell_num=true_cell_num)
+    true_cell_num += 1
+    cell_outputs.append(net)
+
+  # Final nonlinearity.
+  # Note that we have dropped the final pooling, dropout and softmax layers
+  # from the default pnasnet version.
+  with tf.variable_scope('final_layer'):
+    net = tf.nn.relu(net)
+  return net
+
+
+# TODO(shlens): Only fixed_shape_resizer is currently supported for PNASNet
+# featurization. The reason for this is that pnasnet.py only supports
+# inputs with fully known shapes. We need to update pnasnet.py to handle
+# shapes not known at compile time.
+class FasterRCNNPNASFeatureExtractor(
+    faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
+  """Faster R-CNN with PNASNet feature extractor implementation."""
+
+  def __init__(self,
+               is_training,
+               first_stage_features_stride,
+               batch_norm_trainable=False,
+               reuse_weights=None,
+               weight_decay=0.0):
+    """Constructor.
+
+    Args:
+      is_training: See base class.
+      first_stage_features_stride: See base class.
+      batch_norm_trainable: See base class.
+      reuse_weights: See base class.
+      weight_decay: See base class.
+
+    Raises:
+      ValueError: If `first_stage_features_stride` is not 16.
+    """
+    if first_stage_features_stride != 16:
+      raise ValueError('`first_stage_features_stride` must be 16.')
+    super(FasterRCNNPNASFeatureExtractor, self).__init__(
+        is_training, first_stage_features_stride, batch_norm_trainable,
+        reuse_weights, weight_decay)
+
+  def preprocess(self, resized_inputs):
+    """Faster R-CNN with PNAS preprocessing.
+
+    Maps pixel values to the range [-1, 1].
+
+    Args:
+      resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
+        representing a batch of images with values between 0 and 255.0.
+
+    Returns:
+      preprocessed_inputs: A [batch, height_out, width_out, channels] float32
+        tensor representing a batch of images.
+
+    """
+    return (2.0 / 255.0) * resized_inputs - 1.0
+
+  def _extract_proposal_features(self, preprocessed_inputs, scope):
+    """Extracts first stage RPN features.
+
+    Extracts features using the first half of the PNASNet network.
+    We construct the network in `align_feature_maps=True` mode, which means
+    that all VALID paddings in the network are changed to SAME padding so that
+    the feature maps are aligned.
+
+    Args:
+      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
+        representing a batch of images.
+      scope: A scope name.
+
+    Returns:
+      rpn_feature_map: A tensor with shape [batch, height, width, depth]
+      end_points: A dictionary mapping feature extractor tensor names to tensors
+
+    Raises:
+      ValueError: If the created network is missing the required activation.
+    """
+    del scope
+
+    if len(preprocessed_inputs.get_shape().as_list()) != 4:
+      raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a '
+                       'tensor of shape %s' % preprocessed_inputs.get_shape())
+
+    with slim.arg_scope(pnasnet_large_arg_scope_for_detection(
+        is_batch_norm_training=self._train_batch_norm)):
+      with arg_scope([slim.conv2d,
+                      slim.batch_norm,
+                      slim.separable_conv2d],
+                     reuse=self._reuse_weights):
+        _, end_points = pnasnet.build_pnasnet_large(
+            preprocessed_inputs, num_classes=None,
+            is_training=self._is_training,
+            final_endpoint='Cell_7')
+
+    # Note that both 'Cell_6' and 'Cell_7' have equal depth = 2160.
+    # Cell_7 is the last cell before second reduction.
+    rpn_feature_map = tf.concat([end_points['Cell_6'],
+                                 end_points['Cell_7']], 3)
+
+    # pnasnet.py does not maintain the batch size in the first dimension.
+    # This work around permits us retaining the batch for below.
+    batch = preprocessed_inputs.get_shape().as_list()[0]
+    shape_without_batch = rpn_feature_map.get_shape().as_list()[1:]
+    rpn_feature_map_shape = [batch] + shape_without_batch
+    rpn_feature_map.set_shape(rpn_feature_map_shape)
+
+    return rpn_feature_map, end_points
+
+  def _extract_box_classifier_features(self, proposal_feature_maps, scope):
+    """Extracts second stage box classifier features.
+
+    This function reconstructs the "second half" of the PNASNet
+    network after the part defined in `_extract_proposal_features`.
+
+    Args:
+      proposal_feature_maps: A 4-D float tensor with shape
+        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
+        representing the feature map cropped to each proposal.
+      scope: A scope name.
+
+    Returns:
+      proposal_classifier_features: A 4-D float tensor with shape
+        [batch_size * self.max_num_proposals, height, width, depth]
+        representing box classifier features for each proposal.
+    """
+    del scope
+
+    # Number of used stem cells.
+    num_stem_cells = 2
+
+    # Note that we always feed into 2 layers of equal depth
+    # where the first N channels corresponds to previous hidden layer
+    # and the second N channels correspond to the final hidden layer.
+    hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3)
+
+    # Note that what follows is largely a copy of build_pnasnet_large() within
+    # pnasnet.py. We are copying to minimize code pollution in slim.
+
+    # TODO(shlens,skornblith): Determine the appropriate drop path schedule.
+    # For now the schedule is the default (1.0->0.7 over 250,000 train steps).
+    hparams = pnasnet.large_imagenet_config()
+    if not self._is_training:
+      hparams.set_hparam('drop_path_keep_prob', 1.0)
+
+    # Calculate the total number of cells in the network
+    total_num_cells = hparams.num_cells + num_stem_cells
+
+    normal_cell = pnasnet.PNasNetNormalCell(
+        hparams.num_conv_filters, hparams.drop_path_keep_prob,
+        total_num_cells, hparams.total_training_steps)
+    with arg_scope([slim.dropout, nasnet_utils.drop_path],
+                   is_training=self._is_training):
+      with arg_scope([slim.batch_norm], is_training=self._train_batch_norm):
+        with arg_scope([slim.avg_pool2d,
+                        slim.max_pool2d,
+                        slim.conv2d,
+                        slim.batch_norm,
+                        slim.separable_conv2d,
+                        nasnet_utils.factorized_reduction,
+                        nasnet_utils.global_avg_pool,
+                        nasnet_utils.get_channel_index,
+                        nasnet_utils.get_channel_dim],
+                       data_format=hparams.data_format):
+
+          # This corresponds to the cell number just past 'Cell_7' used by
+          # _extract_proposal_features().
+          start_cell_num = 8
+          true_cell_num = start_cell_num + num_stem_cells
+
+          with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()):
+            net = _build_pnasnet_base(
+                hidden_previous,
+                hidden,
+                normal_cell=normal_cell,
+                hparams=hparams,
+                true_cell_num=true_cell_num,
+                start_cell_num=start_cell_num)
+
+    proposal_classifier_features = net
+    return proposal_classifier_features
+
+  def restore_from_classification_checkpoint_fn(
+      self,
+      first_stage_feature_extractor_scope,
+      second_stage_feature_extractor_scope):
+    """Returns a map of variables to load from a foreign checkpoint.
+
+    Note that this overrides the default implementation in
+    faster_rcnn_meta_arch.FasterRCNNFeatureExtractor which does not work for
+    PNASNet checkpoints.
+
+    Args:
+      first_stage_feature_extractor_scope: A scope name for the first stage
+        feature extractor.
+      second_stage_feature_extractor_scope: A scope name for the second stage
+        feature extractor.
+
+    Returns:
+      A dict mapping variable names (to load from a checkpoint) to variables in
+      the model graph.
+    """
+    variables_to_restore = {}
+    for variable in tf.global_variables():
+      if variable.op.name.startswith(
+          first_stage_feature_extractor_scope):
+        var_name = variable.op.name.replace(
+            first_stage_feature_extractor_scope + '/', '')
+        var_name += '/ExponentialMovingAverage'
+        variables_to_restore[var_name] = variable
+      if variable.op.name.startswith(
+          second_stage_feature_extractor_scope):
+        var_name = variable.op.name.replace(
+            second_stage_feature_extractor_scope + '/', '')
+        var_name += '/ExponentialMovingAverage'
+        variables_to_restore[var_name] = variable
+    return variables_to_restore
--- a/research/object_detection/models/faster_rcnn_pnas_feature_extractor_test.py
+++ b/research/object_detection/models/faster_rcnn_pnas_feature_extractor_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for models.faster_rcnn_pnas_feature_extractor."""
+
+import tensorflow as tf
+
+from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
+
+
+class FasterRcnnPNASFeatureExtractorTest(tf.test.TestCase):
+
+  def _build_feature_extractor(self, first_stage_features_stride):
+    return frcnn_pnas.FasterRCNNPNASFeatureExtractor(
+        is_training=False,
+        first_stage_features_stride=first_stage_features_stride,
+        batch_norm_trainable=False,
+        reuse_weights=None,
+        weight_decay=0.0)
+
+  def test_extract_proposal_features_returns_expected_size(self):
+    feature_extractor = self._build_feature_extractor(
+        first_stage_features_stride=16)
+    preprocessed_inputs = tf.random_uniform(
+        [1, 299, 299, 3], maxval=255, dtype=tf.float32)
+    rpn_feature_map, _ = feature_extractor.extract_proposal_features(
+        preprocessed_inputs, scope='TestScope')
+    features_shape = tf.shape(rpn_feature_map)
+
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      features_shape_out = sess.run(features_shape)
+      self.assertAllEqual(features_shape_out, [1, 19, 19, 4320])
+
+  def test_extract_proposal_features_input_size_224(self):
+    feature_extractor = self._build_feature_extractor(
+        first_stage_features_stride=16)
+    preprocessed_inputs = tf.random_uniform(
+        [1, 224, 224, 3], maxval=255, dtype=tf.float32)
+    rpn_feature_map, _ = feature_extractor.extract_proposal_features(
+        preprocessed_inputs, scope='TestScope')
+    features_shape = tf.shape(rpn_feature_map)
+
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      features_shape_out = sess.run(features_shape)
+      self.assertAllEqual(features_shape_out, [1, 14, 14, 4320])
+
+  def test_extract_proposal_features_input_size_112(self):
+    feature_extractor = self._build_feature_extractor(
+        first_stage_features_stride=16)
+    preprocessed_inputs = tf.random_uniform(
+        [1, 112, 112, 3], maxval=255, dtype=tf.float32)
+    rpn_feature_map, _ = feature_extractor.extract_proposal_features(
+        preprocessed_inputs, scope='TestScope')
+    features_shape = tf.shape(rpn_feature_map)
+
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      features_shape_out = sess.run(features_shape)
+      self.assertAllEqual(features_shape_out, [1, 7, 7, 4320])
+
+  def test_extract_proposal_features_dies_on_invalid_stride(self):
+    with self.assertRaises(ValueError):
+      self._build_feature_extractor(first_stage_features_stride=99)
+
+  def test_extract_proposal_features_dies_with_incorrect_rank_inputs(self):
+    feature_extractor = self._build_feature_extractor(
+        first_stage_features_stride=16)
+    preprocessed_inputs = tf.random_uniform(
+        [224, 224, 3], maxval=255, dtype=tf.float32)
+    with self.assertRaises(ValueError):
+      feature_extractor.extract_proposal_features(
+          preprocessed_inputs, scope='TestScope')
+
+  def test_extract_box_classifier_features_returns_expected_size(self):
+    feature_extractor = self._build_feature_extractor(
+        first_stage_features_stride=16)
+    proposal_feature_maps = tf.random_uniform(
+        [2, 17, 17, 1088], maxval=255, dtype=tf.float32)
+    proposal_classifier_features = (
+        feature_extractor.extract_box_classifier_features(
+            proposal_feature_maps, scope='TestScope'))
+    features_shape = tf.shape(proposal_classifier_features)
+
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      features_shape_out = sess.run(features_shape)
+      self.assertAllEqual(features_shape_out, [2, 9, 9, 4320])
+
+  def test_filter_scaling_computation(self):
+    expected_filter_scaling = {
+        ((4, 8), 2): 1.0,
+        ((4, 8), 7): 2.0,
+        ((4, 8), 8): 2.0,
+        ((4, 8), 9): 4.0
+    }
+    for args, filter_scaling in expected_filter_scaling.items():
+      reduction_indices, start_cell_num = args
+      self.assertAlmostEqual(
+          frcnn_pnas._filter_scaling(reduction_indices, start_cell_num),
+          filter_scaling)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/ssd_inception_v2_feature_extractor.py
+++ b/research/object_detection/models/ssd_inception_v2_feature_extractor.py
@@ -37,7 +37,8 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               inplace_batchnorm_update=False):
    """InceptionV2 Feature Extractor for SSD Models.

    Args:
@@ -55,11 +56,16 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      inplace_batchnorm_update: Whether to update batch_norm inplace during
+        training. This is required for batch norm to work correctly on TPUs.
+        When this is false, user must add a control dependency on
+        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+        norm moving average parameters.
    """
    super(SSDInceptionV2FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
        conv_hyperparams, batch_norm_trainable, reuse_weights,
-        use_explicit_padding, use_depthwise)
+        use_explicit_padding, use_depthwise, inplace_batchnorm_update)

  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -76,7 +82,7 @@ class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    """
    return (2.0 / 255.0) * resized_inputs - 1.0

-  def extract_features(self, preprocessed_inputs):
+  def _extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:

--- a/research/object_detection/models/ssd_inception_v3_feature_extractor.py
+++ b/research/object_detection/models/ssd_inception_v3_feature_extractor.py
@@ -37,7 +37,8 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               inplace_batchnorm_update=False):
    """InceptionV3 Feature Extractor for SSD Models.

    Args:
@@ -55,11 +56,16 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      inplace_batchnorm_update: Whether to update batch_norm inplace during
+        training. This is required for batch norm to work correctly on TPUs.
+        When this is false, user must add a control dependency on
+        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+        norm moving average parameters.
    """
    super(SSDInceptionV3FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
        conv_hyperparams, batch_norm_trainable, reuse_weights,
-        use_explicit_padding, use_depthwise)
+        use_explicit_padding, use_depthwise, inplace_batchnorm_update)

  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -76,7 +82,7 @@ class SSDInceptionV3FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    """
    return (2.0 / 255.0) * resized_inputs - 1.0

-  def extract_features(self, preprocessed_inputs):
+  def _extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:

--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
@@ -38,7 +38,8 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               inplace_batchnorm_update=False):
    """MobileNetV1 Feature Extractor for SSD Models.

    Args:
@@ -57,11 +58,16 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        inputs so that the output dimensions are the same as if 'SAME' padding
        were used.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      inplace_batchnorm_update: Whether to update batch_norm inplace during
+        training. This is required for batch norm to work correctly on TPUs.
+        When this is false, user must add a control dependency on
+        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+        norm moving average parameters.
    """
    super(SSDMobileNetV1FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
        conv_hyperparams, batch_norm_trainable, reuse_weights,
-        use_explicit_padding, use_depthwise)
+        use_explicit_padding, use_depthwise, inplace_batchnorm_update)

  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -78,7 +84,7 @@ class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    """
    return (2.0 / 255.0) * resized_inputs - 1.0

-  def extract_features(self, preprocessed_inputs):
+  def _extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:

--- a/research/object_detection/models/ssd_mobilenet_v2_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_feature_extractor.py
@@ -39,7 +39,8 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               inplace_batchnorm_update=False):
    """MobileNetV2 Feature Extractor for SSD Models.

    Mobilenet v2 (experimental), designed by sandler@. More details can be found
@@ -60,11 +61,16 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      inplace_batchnorm_update: Whether to update batch_norm inplace during
+        training. This is required for batch norm to work correctly on TPUs.
+        When this is false, user must add a control dependency on
+        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+        norm moving average parameters.
    """
    super(SSDMobileNetV2FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
        conv_hyperparams, batch_norm_trainable, reuse_weights,
-        use_explicit_padding, use_depthwise)
+        use_explicit_padding, use_depthwise, inplace_batchnorm_update)

  def preprocess(self, resized_inputs):
    """SSD preprocessing.
@@ -81,7 +87,7 @@ class SSDMobileNetV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    """
    return (2.0 / 255.0) * resized_inputs - 1.0

-  def extract_features(self, preprocessed_inputs):
+  def _extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:

--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
@@ -43,7 +43,8 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               inplace_batchnorm_update=False):
    """SSD FPN feature extractor based on Resnet v1 architecture.

    Args:
@@ -66,6 +67,11 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False. UNUSED currently.
      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+      inplace_batchnorm_update: Whether to update batch_norm inplace during
+        training. This is required for batch norm to work correctly on TPUs.
+        When this is false, user must add a control dependency on
+        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+        norm moving average parameters.

    Raises:
      ValueError: On supplying invalid arguments for unused arguments.
@@ -73,7 +79,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    super(_SSDResnetV1FpnFeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
        conv_hyperparams, batch_norm_trainable, reuse_weights,
-        use_explicit_padding)
+        use_explicit_padding, inplace_batchnorm_update)
    if self._depth_multiplier != 1.0:
      raise ValueError('Only depth 1.0 is supported, found: {}'.
                       format(self._depth_multiplier))
@@ -110,7 +116,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        filtered_image_features[feature_name] = feature
    return filtered_image_features

-  def extract_features(self, preprocessed_inputs):
+  def _extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
@@ -176,7 +182,8 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               inplace_batchnorm_update=False):
    """Resnet50 v1 FPN Feature Extractor for SSD Models.

    Args:
@@ -194,11 +201,17 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False. UNUSED currently.
      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+      inplace_batchnorm_update: Whether to update batch_norm inplace during
+        training. This is required for batch norm to work correctly on TPUs.
+        When this is false, user must add a control dependency on
+        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+        norm moving average parameters.
    """
    super(SSDResnet50V1FpnFeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
        conv_hyperparams, resnet_v1.resnet_v1_50, 'resnet_v1_50', 'fpn',
-        batch_norm_trainable, reuse_weights, use_explicit_padding)
+        batch_norm_trainable, reuse_weights, use_explicit_padding,
+        inplace_batchnorm_update)


 class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
@@ -212,7 +225,8 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               inplace_batchnorm_update=False):
    """Resnet101 v1 FPN Feature Extractor for SSD Models.

    Args:
@@ -230,11 +244,17 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False. UNUSED currently.
      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+      inplace_batchnorm_update: Whether to update batch_norm inplace during
+        training. This is required for batch norm to work correctly on TPUs.
+        When this is false, user must add a control dependency on
+        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+        norm moving average parameters.
    """
    super(SSDResnet101V1FpnFeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
        conv_hyperparams, resnet_v1.resnet_v1_101, 'resnet_v1_101', 'fpn',
-        batch_norm_trainable, reuse_weights, use_explicit_padding)
+        batch_norm_trainable, reuse_weights, use_explicit_padding,
+        inplace_batchnorm_update)


 class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
@@ -248,7 +268,8 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               inplace_batchnorm_update=False):
    """Resnet152 v1 FPN Feature Extractor for SSD Models.

    Args:
@@ -266,8 +287,14 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False. UNUSED currently.
      use_depthwise: Whether to use depthwise convolutions. UNUSED currently.
+      inplace_batchnorm_update: Whether to update batch_norm inplace during
+        training. This is required for batch norm to work correctly on TPUs.
+        When this is false, user must add a control dependency on
+        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+        norm moving average parameters.
    """
    super(SSDResnet152V1FpnFeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
        conv_hyperparams, resnet_v1.resnet_v1_152, 'resnet_v1_152', 'fpn',
-        batch_norm_trainable, reuse_weights, use_explicit_padding)
+        batch_norm_trainable, reuse_weights, use_explicit_padding,
+        inplace_batchnorm_update)
--- a/research/object_detection/protos/eval.proto
+++ b/research/object_detection/protos/eval.proto
@@ -68,4 +68,8 @@ message EvalConfig {
  // Whether to keep image identifier in filename when exported to
  // visualization_export_dir.
  optional bool keep_image_id_for_visualization_export = 19 [default=false];
+
+  // Whether to retain original images (i.e. not pre-processed) in the tensor
+  // dictionary, so that they can be displayed in Tensorboard.
+  optional bool retain_original_images = 23 [default=true];
 }
--- a/research/object_detection/protos/faster_rcnn.proto
+++ b/research/object_detection/protos/faster_rcnn.proto
@@ -131,6 +131,12 @@ message FasterRcnn {
  // to use sigmoid loss and enable merge_multiple_label_boxes.
  // If not specified, Softmax loss is used as default.
  optional ClassificationLoss second_stage_classification_loss = 29;
+
+  // Whether to update batch_norm inplace during training. This is required
+  // for batch norm to work correctly on TPUs. When this is false, user must add
+  // a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
+  // to update the batch norm moving average parameters.
+  optional bool inplace_batchnorm_update = 30 [default = false];
 }



--- a/research/object_detection/protos/input_reader.proto
+++ b/research/object_detection/protos/input_reader.proto
@@ -51,6 +51,9 @@ message InputReader {
  // Number of reader instances to create.
  optional uint32 num_readers = 6 [default=32];

+  // Number of records to read from each reader at once.
+  optional uint32 read_block_length = 15 [default=32];
+
  // Number of decoded records to prefetch before batching.
  optional uint32 prefetch_size = 13 [default = 512];


--- a/research/object_detection/protos/ssd.proto
+++ b/research/object_detection/protos/ssd.proto
@@ -59,6 +59,12 @@ message Ssd {

  // Loss configuration for training.
  optional Loss loss = 11;
+
+  // Whether to update batch_norm inplace during training. This is required
+  // for batch norm to work correctly on TPUs. When this is false, user must add
+  // a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
+  // to update the batch norm moving average parameters.
+  optional bool inplace_batchnorm_update = 15 [default = false];
 }



--- a/research/object_detection/protos/train.proto
+++ b/research/object_detection/protos/train.proto
@@ -94,4 +94,9 @@ message TrainConfig {
  // Whether to remove padding along `num_boxes` dimension of the groundtruth
  // tensors.
  optional bool unpad_groundtruth_tensors = 21 [default=true];
+
+  // Whether to retain original images (i.e. not pre-processed) in the tensor
+  // dictionary, so that they can be displayed in Tensorboard. Note that this
+  // will lead to a larger memory footprint.
+  optional bool retain_original_images = 23 [default=false];
 }
--- a/research/object_detection/trainer.py
+++ b/research/object_detection/trainer.py
@@ -264,29 +264,6 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
          total_num_replicas=worker_replicas)
      sync_optimizer = training_optimizer

-    # Create ops required to initialize the model from a given checkpoint.
-    init_fn = None
-    if train_config.fine_tune_checkpoint:
-      if not train_config.fine_tune_checkpoint_type:
-        # train_config.from_detection_checkpoint field is deprecated. For
-        # backward compatibility, fine_tune_checkpoint_type is set based on
-        # from_detection_checkpoint.
-        if train_config.from_detection_checkpoint:
-          train_config.fine_tune_checkpoint_type = 'detection'
-        else:
-          train_config.fine_tune_checkpoint_type = 'classification'
-      var_map = detection_model.restore_map(
-          fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
-          load_all_detection_checkpoint_vars=(
-              train_config.load_all_detection_checkpoint_vars))
-      available_var_map = (variables_helper.
-                           get_variables_available_in_checkpoint(
-                               var_map, train_config.fine_tune_checkpoint))
-      init_saver = tf.train.Saver(available_var_map)
-      def initializer_fn(sess):
-        init_saver.restore(sess, train_config.fine_tune_checkpoint)
-      init_fn = initializer_fn
-
    with tf.device(deploy_config.optimizer_device()):
      regularization_losses = (None if train_config.add_regularization_loss
                               else [])
@@ -354,6 +331,29 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
    saver = tf.train.Saver(
        keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)

+    # Create ops required to initialize the model from a given checkpoint.
+    init_fn = None
+    if train_config.fine_tune_checkpoint:
+      if not train_config.fine_tune_checkpoint_type:
+        # train_config.from_detection_checkpoint field is deprecated. For
+        # backward compatibility, fine_tune_checkpoint_type is set based on
+        # from_detection_checkpoint.
+        if train_config.from_detection_checkpoint:
+          train_config.fine_tune_checkpoint_type = 'detection'
+        else:
+          train_config.fine_tune_checkpoint_type = 'classification'
+      var_map = detection_model.restore_map(
+          fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
+          load_all_detection_checkpoint_vars=(
+              train_config.load_all_detection_checkpoint_vars))
+      available_var_map = (variables_helper.
+                           get_variables_available_in_checkpoint(
+                               var_map, train_config.fine_tune_checkpoint))
+      init_saver = tf.train.Saver(available_var_map)
+      def initializer_fn(sess):
+        init_saver.restore(sess, train_config.fine_tune_checkpoint)
+      init_fn = initializer_fn
+
    slim.learning.train(
        train_tensor,
        logdir=train_dir,

--- a/research/object_detection/utils/config_util.py
+++ b/research/object_detection/utils/config_util.py
@@ -14,10 +14,13 @@
 # ==============================================================================
 """Functions for reading and updating configuration files."""

+import os
 import tensorflow as tf

 from google.protobuf import text_format

+from tensorflow.python.lib.io import file_io
+
 from object_detection.protos import eval_pb2
 from object_detection.protos import input_reader_pb2
 from object_detection.protos import model_pb2
@@ -119,6 +122,24 @@ def create_pipeline_proto_from_configs(configs):
  return pipeline_config


+def save_pipeline_config(pipeline_config, directory):
+  """Saves a pipeline config text file to disk.
+
+  Args:
+    pipeline_config: A pipeline_pb2.TrainEvalPipelineConfig.
+    directory: The model directory into which the pipeline config file will be
+      saved.
+  """
+  if not file_io.file_exists(directory):
+    file_io.recursive_create_dir(directory)
+  pipeline_config_path = os.path.join(directory, "pipeline.config")
+  config_text = text_format.MessageToString(pipeline_config)
+  with tf.gfile.Open(pipeline_config_path, "wb") as f:
+    tf.logging.info("Writing pipeline config file to %s",
+                    pipeline_config_path)
+    f.write(config_text)
+
+
 def get_configs_from_multiple_files(model_config_path="",
                                    train_config_path="",
                                    train_input_config_path="",

--- a/research/object_detection/utils/config_util_test.py
+++ b/research/object_detection/utils/config_util_test.py
@@ -110,6 +110,23 @@ class ConfigUtilTest(tf.test.TestCase):
        config_util.create_pipeline_proto_from_configs(configs))
    self.assertEqual(pipeline_config, pipeline_config_reconstructed)

+  def test_save_pipeline_config(self):
+    """Tests that the pipeline config is properly saved to disk."""
+    pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+    pipeline_config.model.faster_rcnn.num_classes = 10
+    pipeline_config.train_config.batch_size = 32
+    pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
+    pipeline_config.eval_config.num_examples = 20
+    pipeline_config.eval_input_reader.queue_capacity = 100
+
+    config_util.save_pipeline_config(pipeline_config, self.get_temp_dir())
+    configs = config_util.get_configs_from_pipeline_file(
+        os.path.join(self.get_temp_dir(), "pipeline.config"))
+    pipeline_config_reconstructed = (
+        config_util.create_pipeline_proto_from_configs(configs))
+
+    self.assertEqual(pipeline_config, pipeline_config_reconstructed)
+
  def test_get_configs_from_multiple_files(self):
    """Tests that proto configs can be read from multiple files."""
    temp_dir = self.get_temp_dir()

--- a/research/object_detection/utils/context_manager.py
+++ b/research/object_detection/utils/context_manager.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python context management helper."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+class IdentityContextManager(object):
+  """Returns an identity context manager that does nothing.
+
+  This is helpful in setting up conditional `with` statement as below:
+
+  with slim.arg_scope(x) if use_slim_scope else IdentityContextManager():
+    do_stuff()
+
+  """
+
+  def __enter__(self):
+    return None
+
+  def __exit__(self, exec_type, exec_value, traceback):
+    del exec_type
+    del exec_value
+    del traceback
+    return False
+
--- a/research/object_detection/utils/context_manager_test.py
+++ b/research/object_detection/utils/context_manager_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow_models.object_detection.utils.context_manager."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from object_detection.utils import context_manager
+
+
+class ContextManagerTest(tf.test.TestCase):
+
+  def test_identity_context_manager(self):
+    with context_manager.IdentityContextManager() as identity_context:
+      self.assertIsNone(identity_context)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/utils/dataset_util.py
+++ b/research/object_detection/utils/dataset_util.py
@@ -123,11 +123,16 @@ def read_dataset(file_read_func, decode_func, input_files, config):
  if config.shuffle:
    filename_dataset = filename_dataset.shuffle(
        config.filenames_shuffle_buffer_size)
+  elif config.num_readers > 1:
+    tf.logging.warning('`shuffle` is false, but the input data stream is '
+                       'still slightly shuffled since `num_readers` > 1.')
+
  filename_dataset = filename_dataset.repeat(config.num_epochs or None)

  records_dataset = filename_dataset.apply(
      tf.contrib.data.parallel_interleave(
-          file_read_func, cycle_length=config.num_readers, sloppy=True))
+          file_read_func, cycle_length=config.num_readers,
+          block_length=config.read_block_length, sloppy=True))
  if config.shuffle:
    records_dataset.shuffle(config.shuffle_buffer_size)
  tensor_dataset = records_dataset.map(

--- a/research/object_detection/utils/ops.py
+++ b/research/object_detection/utils/ops.py
@@ -180,26 +180,24 @@ def pad_to_multiple(tensor, multiple):
    padded_tensor_width = int(
        math.ceil(float(tensor_width) / multiple) * multiple)

-  if (padded_tensor_height == tensor_height and
-      padded_tensor_width == tensor_width):
-    return tensor
-
  if tensor_depth is None:
    tensor_depth = tf.shape(tensor)[3]

  # Use tf.concat instead of tf.pad to preserve static shape
-  height_pad = tf.zeros([
-      batch_size, padded_tensor_height - tensor_height, tensor_width,
-      tensor_depth
-  ])
-  padded_tensor = tf.concat([tensor, height_pad], 1)
-  width_pad = tf.zeros([
-      batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
-      tensor_depth
-  ])
-  padded_tensor = tf.concat([padded_tensor, width_pad], 2)
-
-  return padded_tensor
+  if padded_tensor_height != tensor_height:
+    height_pad = tf.zeros([
+        batch_size, padded_tensor_height - tensor_height, tensor_width,
+        tensor_depth
+    ])
+    tensor = tf.concat([tensor, height_pad], 1)
+  if padded_tensor_width != tensor_width:
+    width_pad = tf.zeros([
+        batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
+        tensor_depth
+    ])
+    tensor = tf.concat([tensor, width_pad], 2)
+
+  return tensor


 def padded_one_hot_encoding(indices, depth, left_pad):