add flag for saving images to summary; strings moved to common.py'

b3247557 · Dheera Venkatraman · 75c931fd · 2041d5ca · b3247557 · b3247557
Commit b3247557 authored Apr 04, 2018 by Dheera Venkatraman
20 changed files
--- a/research/learning_unsupervised_learning/utils.py
+++ b/research/learning_unsupervised_learning/utils.py
+# Copyright 2018 Google, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Utilities.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+import tensorflow as tf
+import sonnet as snt
+import itertools
+import functools
+
+from tensorflow.core.framework import node_def_pb2
+from tensorflow.python.framework import device as pydev
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import variable_scope as variable_scope_ops
+from sonnet.python.modules import util as snt_util
+
+from tensorflow.python.util import nest
+
+
+def eqzip(*args):
+  """Zip but raises error if lengths don't match.
+
+  Args:
+    *args: list of lists or tuples
+  Returns:
+    list: the result of zip
+  Raises:
+    ValueError: when the lengths don't match
+  """
+
+  sizes = [len(x) for x in args]
+  if not all([sizes[0] == x for x in sizes]):
+    raise ValueError("Lists are of different sizes. \n %s"%str(sizes))
+  return zip(*args)
+
+
+@contextlib.contextmanager
+def assert_no_new_variables():
+  """Ensure that no tf.Variables are constructed inside the context.
+
+  Yields:
+    None
+  Raises:
+    ValueError: if there is a variable created.
+  """
+  num_vars = len(tf.global_variables())
+  old_variables = tf.global_variables()
+  yield
+  if len(tf.global_variables()) != num_vars:
+    new_vars = set(tf.global_variables()) - set(old_variables)
+    tf.logging.error("NEW VARIABLES CREATED")
+    tf.logging.error(10*"=")
+    for v in new_vars:
+      tf.logging.error(v)
+
+    raise ValueError("Variables created inside an "
+                     "assert_no_new_variables context")
+  if old_variables != tf.global_variables():
+    raise ValueError("Variables somehow changed inside an "
+                     "assert_no_new_variables context."
+                     "This means something modified the tf.global_variables()")
+
+
+def get_variables_in_modules(module_list):
+  var_list = []
+  for m in module_list:
+    var_list.extend(snt.get_variables_in_module(m))
+  return var_list
+
+
+def state_barrier_context(state):
+  """Return a context manager that prevents interior ops from running
+  unless the whole state has been computed.
+
+  This is to prevent assign race conditions.
+  """
+  tensors = [x for x in nest.flatten(state) if type(x) == tf.Tensor]
+  tarray = [x.flow for x in nest.flatten(state) if hasattr(x, "flow")]
+  return tf.control_dependencies(tensors + tarray)
+
+
+def _identity_fn(tf_entity):
+  if hasattr(tf_entity, "identity"):
+    return tf_entity.identity()
+  else:
+    return tf.identity(tf_entity)
+
+
+def state_barrier_result(state):
+  """Return the same state, but with a control dependency to prevent it from
+  being partially computed
+  """
+  with state_barrier_context(state):
+    return nest.map_structure(_identity_fn, state)
+
+
+def train_iterator(num_iterations):
+  """Iterator that returns an index of the current step.
+  This iterator runs forever if num_iterations is None
+  otherwise it runs for some fixed amount of steps.
+  """
+  if num_iterations is None:
+    return itertools.count()
+  else:
+    return xrange(num_iterations)
+
+
+def print_op(op, msg):
+  """Print a string and return an op wrapped in a control dependency to make
+  sure it ran."""
+  print_op = tf.Print(tf.constant(0), [tf.constant(0)], msg)
+  return tf.group(op, print_op)
+
+
+class MultiQueueRunner(tf.train.QueueRunner):
+  """A QueueRunner with multiple queues """
+  def __init__(self, queues, enqueue_ops):
+    close_op = tf.group(* [q.close() for q in queues])
+    cancel_op = tf.group(
+        * [q.close(cancel_pending_enqueues=True) for q in queues])
+    queue_closed_exception_types = (errors.OutOfRangeError,)
+
+    enqueue_op = tf.group(*enqueue_ops, name="multi_enqueue")
+
+    super(MultiQueueRunner, self).__init__(
+        queues[0],
+        enqueue_ops=[enqueue_op],
+        close_op=close_op,
+        cancel_op=cancel_op,
+        queue_closed_exception_types=queue_closed_exception_types)
+
+
+# This function is not elegant, but I tried so many other ways to get this to
+# work and this is the only one that ended up not incuring significant overhead
+# or obscure tensorflow bugs.
+def sample_n_per_class(dataset, samples_per_class):
+  """Create a new callable / dataset object that returns batches of each with
+  samples_per_class per label.
+
+  Args:
+    dataset: fn
+    samples_per_class: int
+  Returns:
+    function, [] -> batch where batch is the same type as the return of
+    dataset().
+  """
+
+  with tf.control_dependencies(None), tf.name_scope(None):
+    with tf.name_scope("queue_runner/sample_n_per_class"):
+      batch = dataset()
+      num_classes = batch.label_onehot.shape.as_list()[1]
+      batch_size = num_classes * samples_per_class
+
+      flatten = nest.flatten(batch)
+      queues = []
+      enqueue_ops = []
+      capacity = samples_per_class * 20
+      for i in xrange(num_classes):
+        queue = tf.FIFOQueue(
+            capacity=capacity,
+            shapes=[f.shape.as_list()[1:] for f in flatten],
+            dtypes=[f.dtype for f in flatten])
+        queues.append(queue)
+
+        idx = tf.where(tf.equal(batch.label, i))
+        sub_batch = []
+        to_enqueue = []
+        for elem in batch:
+          new_e = tf.gather(elem, idx)
+          new_e = tf.squeeze(new_e, 1)
+          to_enqueue.append(new_e)
+
+        remaining = (capacity - queue.size())
+        to_add = tf.minimum(tf.shape(idx)[0], remaining)
+
+        def _enqueue():
+          return queue.enqueue_many([t[:to_add] for t in to_enqueue])
+
+        enqueue_op = tf.cond(
+            tf.equal(to_add, 0), tf.no_op, _enqueue)
+        enqueue_ops.append(enqueue_op)
+
+      # This has caused many deadlocks / issues. This is some logging to at least
+      # shed light to what is going on.
+      print_lam = lambda: tf.Print(tf.constant(0.0), [q.size() for q in queues], "MultiQueueRunner queues status. Has capacity %d"%capacity)
+      some_percent_of_time = tf.less(tf.random_uniform([]), 0.0005)
+      maybe_print = tf.cond(some_percent_of_time, print_lam, lambda: tf.constant(0.0))
+      with tf.control_dependencies([maybe_print]):
+        enqueue_ops = [tf.group(e) for e in enqueue_ops]
+      qr = MultiQueueRunner(queues=queues, enqueue_ops=enqueue_ops)
+      tf.train.add_queue_runner(qr)
+
+  def dequeue_batch():
+    with tf.name_scope("sample_n_per_batch/dequeue/"):
+      entries = []
+      for q in queues:
+        entries.append(q.dequeue_many(samples_per_class))
+
+      flat_batch = [tf.concat(x, 0) for x in zip(*entries)]
+      idx = tf.random_shuffle(tf.range(batch_size))
+      flat_batch = [tf.gather(f, idx, axis=0) for f in flat_batch]
+      return nest.pack_sequence_as(batch, flat_batch)
+
+  return dequeue_batch
+
+def structure_map_multi(func, values):
+  all_values = [nest.flatten(v) for v in values]
+  rets = []
+  for pair in zip(*all_values):
+    rets.append(func(pair))
+  return nest.pack_sequence_as(values[0], rets)
+
+def structure_map_split(func, value):
+  vv = nest.flatten(value)
+  rets = []
+  for v in vv:
+    rets.append(func(v))
+  return [nest.pack_sequence_as(value, r) for r in zip(*rets)]
+
+def assign_variables(targets, values):
+  return tf.group(*[t.assign(v) for t,v in eqzip(targets, values)],
+                  name="assign_variables")
+
+
+def create_variables_in_class_scope(method):
+  """Force the variables constructed in this class to live in the sonnet module.
+  Wraps a method on a sonnet module.
+
+  For example the following will create two different variables.
+  ```
+  class Mod(snt.AbstractModule):
+    @create_variables_in_class_scope
+    def dynamic_thing(self, input, name):
+      return snt.Linear(name)(input)
+  mod.dynamic_thing(x, name="module_nameA")
+  mod.dynamic_thing(x, name="module_nameB")
+  # reuse
+  mod.dynamic_thing(y, name="module_nameA")
+  ```
+  """
+  @functools.wraps(method)
+  def wrapper(obj, *args, **kwargs):
+    def default_context_manager(reuse=None):
+      variable_scope = obj.variable_scope
+      return tf.variable_scope(variable_scope, reuse=reuse)
+
+    variable_scope_context_manager = getattr(obj, "_enter_variable_scope",
+                                             default_context_manager)
+    graph = tf.get_default_graph()
+
+    # Temporarily enter the variable scope to capture it
+    with variable_scope_context_manager() as tmp_variable_scope:
+      variable_scope = tmp_variable_scope
+
+    with variable_scope_ops._pure_variable_scope(
+        variable_scope, reuse=tf.AUTO_REUSE) as pure_variable_scope:
+
+      name_scope = variable_scope.original_name_scope
+      if name_scope[-1] != "/":
+        name_scope += "/"
+
+      with tf.name_scope(name_scope):
+        sub_scope = snt_util.to_snake_case(method.__name__)
+        with tf.name_scope(sub_scope) as scope:
+          out_ops = method(obj, *args, **kwargs)
+          return out_ops
+
+  return wrapper
+
--- a/research/learning_unsupervised_learning/variable_replace.py
+++ b/research/learning_unsupervised_learning/variable_replace.py
+# Copyright 2018 Google, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+
+from __future__ import absolute_import
+from __future__ import division
+
+import tensorflow as tf
+from contextlib import contextmanager
+
+from tensorflow.python.ops import variable_scope
+
+# sanity global state to ensure non recursive.
+_is_variable_replacing = [False]
+
+def in_variable_replace_scope():
+  return _is_variable_replacing[0]
+
+@contextmanager
+def variable_replace(replacements, no_new=True):
+  """ A context manager that replaces variables.
+
+  This is a context manager that replaces all calls to
+  get_variable with the variable in replacements.
+  This function does not support recursive application.
+
+  Args:
+    replacements: dict
+        dictionary mapping a variable to replace (the key), with
+        the variable one wants to replace this variable with (the value).
+    no_new: bool
+        raise an error if variables were created.
+        This is for sanity checking.
+  Raises:
+    ValueError: if a new variable or not all the replacements are used.
+  """
+  # TODO(lmetz) This function is a bit scary, as it relies on monkey patching
+  # the call to get_variable. Ideally this can be done with variable_scope's
+  # custom_getter attribute, but when initially writing this that was not
+  # avalible.
+
+  replacements = {k: v for k, v in replacements.items() if not k == v}
+
+  init_vars = tf.trainable_variables()
+  old_get_variable = variable_scope.get_variable
+  old_tf_get_variable = tf.get_variable
+
+  names_replace = {}
+  has_replaced_names = []
+  tf.logging.vlog(2, "Trying to replace")
+  for k, v in replacements.items():
+    tf.logging.vlog(2, k.name + " >> " + v.name)
+  tf.logging.vlog(2, "===")
+
+  for k, v in replacements.items():
+    strip_name = k.name.replace("/read:0", "")
+    strip_name = strip_name.replace(":0", "")
+    names_replace[strip_name] = v
+    # TODO(lmetz) is there a cleaner way to do this?
+  def new_get_variable(name, *args, **kwargs):
+    #print "Monkeypatch get variable run with name:", name
+    n = tf.get_variable_scope().name + "/" + name
+    #print "Monkeypatch get variable run with name:", n
+    if n in names_replace:
+      has_replaced_names.append(n)
+      return names_replace[n]
+    else:
+      return old_get_variable(name, *args, **kwargs)
+
+  # perform the monkey patch
+  if _is_variable_replacing[0] == True:
+    raise ValueError("No recursive calling to variable replace allowed.")
+
+  variable_scope.get_variable = new_get_variable
+  tf.get_variable = new_get_variable
+
+  _is_variable_replacing[0] = True
+
+  yield
+
+  if set(has_replaced_names) != set(names_replace.keys()):
+    print "Didn't use all replacements"
+    print "replaced variables that are not requested??"
+    print "==="
+    for n in list(set(has_replaced_names) - set(names_replace.keys())):
+      print n
+    print "Missed replacing variables"
+    print "==="
+    for n in list(set(names_replace.keys()) - set(has_replaced_names)):
+      print n, "==>", names_replace[n].name
+    raise ValueError("Fix this -- see stderr")
+
+  # undo the monkey patch
+  tf.get_variable = old_tf_get_variable
+  variable_scope.get_variable = old_get_variable
+
+  _is_variable_replacing[0] = False
+
+  final_vars = tf.trainable_variables()
+  assert set(init_vars) == set(final_vars), "trainable variables changed"
--- a/research/object_detection/README.md
+++ b/research/object_detection/README.md
@@ -29,6 +29,7 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.go

 * Jonathan Huang, github: [jch1](https://github.com/jch1)
 * Vivek Rathod, github: [tombstone](https://github.com/tombstone)
+* Ronny Votel, github: [ronnyvotel](https://github.com/ronnyvotel)
 * Derek Chow, github: [derekjchow](https://github.com/derekjchow)
 * Chen Sun, github: [jesu9](https://github.com/jesu9)
 * Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon)
@@ -89,6 +90,16 @@ reporting an issue.

 ## Release information

+### April 2, 2018
+
+Supercharge your mobile phones with the next generation mobile object detector!
+We are adding support for MobileNet V2 with SSDLite presented in
+[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381).
+This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU (200ms vs. 270ms) at the same accuracy.
+Along with the model definition, we are also releasing a model checkpoint trained on the COCO dataset.
+
+<b>Thanks to contributors</b>: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek Rathod, Jonathan Huang
+
 ### February 9, 2018

 We now support instance segmentation!!  In this API update we support a number of instance segmentation models similar to those discussed in the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer to

--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -30,6 +30,7 @@ from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
 from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
 from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
+from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
 from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
 from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
 from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
@@ -55,6 +56,8 @@ SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
 FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
    'faster_rcnn_nas':
    frcnn_nas.FasterRCNNNASFeatureExtractor,
+    'faster_rcnn_pnas':
+    frcnn_pnas.FasterRCNNPNASFeatureExtractor,
    'faster_rcnn_inception_resnet_v2':
    frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor,
    'faster_rcnn_inception_v2':
@@ -95,13 +98,19 @@ def build(model_config, is_training, add_summaries=True):


 def _build_ssd_feature_extractor(feature_extractor_config, is_training,
-                                 reuse_weights=None):
+                                 reuse_weights=None,
+                                 inplace_batchnorm_update=False):
  """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.

  Args:
    feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
    is_training: True if this feature extractor is being built for training.
    reuse_weights: if the feature extractor should reuse weights.
+    inplace_batchnorm_update: Whether to update batch_norm inplace during
+      training. This is required for batch norm to work correctly on TPUs. When
+      this is false, user must add a control dependency on
+      tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+      norm moving average parameters.

  Returns:
    ssd_meta_arch.SSDFeatureExtractor based on config.
@@ -126,7 +135,8 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
  return feature_extractor_class(is_training, depth_multiplier, min_depth,
                                 pad_to_multiple, conv_hyperparams,
                                 batch_norm_trainable, reuse_weights,
-                                 use_explicit_padding, use_depthwise)
+                                 use_explicit_padding, use_depthwise,
+                                 inplace_batchnorm_update)


 def _build_ssd_model(ssd_config, is_training, add_summaries):
@@ -140,6 +150,7 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):

  Returns:
    SSDMetaArch based on the config.
+
  Raises:
    ValueError: If ssd_config.type is not recognized (i.e. not registered in
      model_class_map).
@@ -147,8 +158,10 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
  num_classes = ssd_config.num_classes

  # Feature extractor
-  feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
-                                                   is_training)
+  feature_extractor = _build_ssd_feature_extractor(
+      feature_extractor_config=ssd_config.feature_extractor,
+      is_training=is_training,
+      inplace_batchnorm_update=ssd_config.inplace_batchnorm_update)

  box_coder = box_coder_builder.build(ssd_config.box_coder)
  matcher = matcher_builder.build(ssd_config.matcher)
@@ -194,7 +207,8 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):


 def _build_faster_rcnn_feature_extractor(
-    feature_extractor_config, is_training, reuse_weights=None):
+    feature_extractor_config, is_training, reuse_weights=None,
+    inplace_batchnorm_update=False):
  """Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.

  Args:
@@ -202,6 +216,11 @@ def _build_faster_rcnn_feature_extractor(
      faster_rcnn.proto.
    is_training: True if this feature extractor is being built for training.
    reuse_weights: if the feature extractor should reuse weights.
+    inplace_batchnorm_update: Whether to update batch_norm inplace during
+      training. This is required for batch norm to work correctly on TPUs. When
+      this is false, user must add a control dependency on
+      tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+      norm moving average parameters.

  Returns:
    faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
@@ -209,6 +228,8 @@ def _build_faster_rcnn_feature_extractor(
  Raises:
    ValueError: On invalid feature extractor type.
  """
+  if inplace_batchnorm_update:
+    raise ValueError('inplace batchnorm updates not supported.')
  feature_type = feature_extractor_config.type
  first_stage_features_stride = (
      feature_extractor_config.first_stage_features_stride)
@@ -238,6 +259,7 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):

  Returns:
    FasterRCNNMetaArch based on the config.
+
  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
@@ -246,7 +268,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
  image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

  feature_extractor = _build_faster_rcnn_feature_extractor(
-      frcnn_config.feature_extractor, is_training)
+      frcnn_config.feature_extractor, is_training,
+      frcnn_config.inplace_batchnorm_update)

  number_of_stages = frcnn_config.number_of_stages
  first_stage_anchor_generator = anchor_generator_builder.build(

--- a/research/object_detection/builders/model_builder_test.py
+++ b/research/object_detection/builders/model_builder_test.py
@@ -25,6 +25,7 @@ from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
 from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
 from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
+from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
 from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
 from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
 from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
@@ -297,6 +298,7 @@ class ModelBuilderTest(tf.test.TestCase):
  def test_create_ssd_mobilenet_v1_model_from_config(self):
    model_text_proto = """
      ssd {
+        inplace_batchnorm_update: true
        feature_extractor {
          type: 'ssd_mobilenet_v1'
          conv_hyperparams {
@@ -519,6 +521,7 @@ class ModelBuilderTest(tf.test.TestCase):
  def test_create_faster_rcnn_resnet_v1_models_from_config(self):
    model_text_proto = """
      faster_rcnn {
+        inplace_batchnorm_update: true
        num_classes: 3
        image_resizer {
          keep_aspect_ratio_resizer {
@@ -726,6 +729,73 @@ class ModelBuilderTest(tf.test.TestCase):
        model._feature_extractor,
        frcnn_nas.FasterRCNNNASFeatureExtractor)

+  def test_create_faster_rcnn_pnas_model_from_config(self):
+    model_text_proto = """
+      faster_rcnn {
+        num_classes: 3
+        image_resizer {
+          keep_aspect_ratio_resizer {
+            min_dimension: 600
+            max_dimension: 1024
+          }
+        }
+        feature_extractor {
+          type: 'faster_rcnn_pnas'
+        }
+        first_stage_anchor_generator {
+          grid_anchor_generator {
+            scales: [0.25, 0.5, 1.0, 2.0]
+            aspect_ratios: [0.5, 1.0, 2.0]
+            height_stride: 16
+            width_stride: 16
+          }
+        }
+        first_stage_box_predictor_conv_hyperparams {
+          regularizer {
+            l2_regularizer {
+            }
+          }
+          initializer {
+            truncated_normal_initializer {
+            }
+          }
+        }
+        initial_crop_size: 17
+        maxpool_kernel_size: 1
+        maxpool_stride: 1
+        second_stage_box_predictor {
+          mask_rcnn_box_predictor {
+            fc_hyperparams {
+              op: FC
+              regularizer {
+                l2_regularizer {
+                }
+              }
+              initializer {
+                truncated_normal_initializer {
+                }
+              }
+            }
+          }
+        }
+        second_stage_post_processing {
+          batch_non_max_suppression {
+            score_threshold: 0.01
+            iou_threshold: 0.6
+            max_detections_per_class: 100
+            max_total_detections: 300
+          }
+          score_converter: SOFTMAX
+        }
+      }"""
+    model_proto = model_pb2.DetectionModel()
+    text_format.Merge(model_text_proto, model_proto)
+    model = model_builder.build(model_proto, is_training=True)
+    self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
+    self.assertIsInstance(
+        model._feature_extractor,
+        frcnn_pnas.FasterRCNNPNASFeatureExtractor)
+
  def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self):
    model_text_proto = """
      faster_rcnn {

--- a/research/object_detection/core/box_list_ops_test.py
+++ b/research/object_detection/core/box_list_ops_test.py
@@ -17,6 +17,7 @@
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops

 from object_detection.core import box_list
 from object_detection.core import box_list_ops
@@ -509,9 +510,13 @@ class BoxListOpsTest(tf.test.TestCase):
      with self.assertRaises(ValueError):
        box_list_ops.sort_by_field(boxes, 'misc')

-      with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
-                                               'Incorrect field size'):
-        sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
+      if ops._USE_C_API:
+        with self.assertRaises(ValueError):
+          box_list_ops.sort_by_field(boxes, 'weights')
+      else:
+        with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
+                                                 'Incorrect field size'):
+          sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())

  def test_visualize_boxes_in_image(self):
    image = tf.zeros((6, 4, 3))

--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -2279,7 +2279,11 @@ def resize_image(image,
        return new_masks

      def reshape_masks_branch():
-        new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
+        # The shape function will be computed for both branches of the
+        # condition, regardless of which branch is actually taken. Make sure
+        # that we don't trigger an assertion in the shape function when trying
+        # to reshape a non empty tensor into an empty one.
+        new_masks = tf.reshape(masks, [-1, new_size[0], new_size[1]])
        return new_masks

      masks = tf.cond(num_instances > 0, resize_masks_branch,

--- a/research/object_detection/dataset_tools/download_and_preprocess_mscoco.sh
+++ b/research/object_detection/dataset_tools/download_and_preprocess_mscoco.sh
@@ -64,7 +64,7 @@ cd ${SCRATCH_DIR}
 # Download the images.
 BASE_IMAGE_URL="http://images.cocodataset.org/zips"

-# TRAIN_IMAGE_FILE="train2017.zip"
+TRAIN_IMAGE_FILE="train2017.zip"
 download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
 TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017"

@@ -91,7 +91,7 @@ download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE}

 TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json"

-# # Build TFRecords of the image data.
+# Build TFRecords of the image data.
 cd "${CURRENT_DIR}"
 python object_detection/dataset_tools/create_coco_tf_record.py \
  --logtostderr \

--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -79,7 +79,7 @@ def visualize_detection_results(result_dict,
      data corresponding to each image being evaluated.  The following keys
      are required:
        'original_image': a numpy array representing the image with shape
-          [1, height, width, 3]
+          [1, height, width, 3] or [1, height, width, 1]
        'detection_boxes': a numpy array of shape [N, 4]
        'detection_scores': a numpy array of shape [N]
        'detection_classes': a numpy array of shape [N]
@@ -133,6 +133,8 @@ def visualize_detection_results(result_dict,
  category_index = label_map_util.create_category_index(categories)

  image = np.squeeze(result_dict[input_fields.original_image], axis=0)
+  if image.shape[2] == 1:  # If one channel image, repeat in RGB.
+    image = np.tile(image, [1, 1, 3])
  detection_boxes = result_dict[detection_fields.detection_boxes]
  detection_scores = result_dict[detection_fields.detection_scores]
  detection_classes = np.int32((result_dict[

--- a/research/object_detection/evaluator.py
+++ b/research/object_detection/evaluator.py
@@ -94,14 +94,24 @@ def _extract_predictions_and_losses(model,
    if fields.InputDataFields.groundtruth_group_of in input_dict:
      groundtruth[fields.InputDataFields.groundtruth_group_of] = (
          input_dict[fields.InputDataFields.groundtruth_group_of])
+    groundtruth_masks_list = None
    if fields.DetectionResultFields.detection_masks in detections:
      groundtruth[fields.InputDataFields.groundtruth_instance_masks] = (
          input_dict[fields.InputDataFields.groundtruth_instance_masks])
+      groundtruth_masks_list = [
+          input_dict[fields.InputDataFields.groundtruth_instance_masks]]
+    groundtruth_keypoints_list = None
+    if fields.DetectionResultFields.detection_keypoints in detections:
+      groundtruth[fields.InputDataFields.groundtruth_keypoints] = (
+          input_dict[fields.InputDataFields.groundtruth_keypoints])
+      groundtruth_keypoints_list = [
+          input_dict[fields.InputDataFields.groundtruth_keypoints]]
    label_id_offset = 1
    model.provide_groundtruth(
        [input_dict[fields.InputDataFields.groundtruth_boxes]],
        [tf.one_hot(input_dict[fields.InputDataFields.groundtruth_classes]
-                    - label_id_offset, depth=model.num_classes)])
+                    - label_id_offset, depth=model.num_classes)],
+        groundtruth_masks_list, groundtruth_keypoints_list)
    losses_dict.update(model.loss(prediction_dict, true_image_shapes))

  result_dict = eval_util.result_dict_for_single_example(
@@ -205,7 +215,7 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
    except tf.errors.InvalidArgumentError:
      logging.info('Skipping image')
      counters['skipped'] += 1
-      return {}
+      return {}, {}
    global_step = tf.train.global_step(sess, tf.train.get_global_step())
    if batch_index < eval_config.num_visualizations:
      tag = 'image-{}'.format(batch_index)

--- a/research/object_detection/g3doc/detection_model_zoo.md
+++ b/research/object_detection/g3doc/detection_model_zoo.md
@@ -19,7 +19,9 @@ In the table below, we list each such pre-trained model including:
  aware that these timings depend highly on one's specific hardware
  configuration (these timings were performed using an Nvidia
  GeForce GTX TITAN X card) and should be treated more as relative timings in
-  many cases.
+  many cases. Also note that desktop GPU timing does not always reflect mobile
+  run time. For example Mobilenet V2 is faster on mobile devices than Mobilenet
+  V1, but is slightly slower on desktop GPU.
 * detector performance on subset of the COCO validation set or Open Images test split as measured by the dataset-specific mAP measure.
  Here, higher is better, and we only report bounding box mAP rounded to the
  nearest integer.
@@ -68,6 +70,7 @@ Some remarks on frozen inference graphs:
 | Model name  | Speed (ms) | COCO mAP[^1] | Outputs |
 | ------------ | :--------------: | :--------------: | :-------------: |
 | [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz) | 30 | 21 | Boxes |
+| [ssd_mobilenet_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz) | 31 | 22 | Boxes |
 | [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz) | 42 | 24 | Boxes |
 | [faster_rcnn_inception_v2_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz) | 58 | 28 | Boxes |
 | [faster_rcnn_resnet50_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz) | 89 | 30 | Boxes |

--- a/research/object_detection/g3doc/running_pets.md
+++ b/research/object_detection/g3doc/running_pets.md
@@ -37,7 +37,7 @@ environment variable below:
 export YOUR_GCS_BUCKET=${YOUR_GCS_BUCKET}
 ```

-It is also possible to run locally by following 
+It is also possible to run locally by following
 [the running locally instructions](running_locally.md).

 ## Installing Tensorflow and the Tensorflow Object Detection API

--- a/research/object_detection/inputs.py
+++ b/research/object_detection/inputs.py
@@ -58,7 +58,8 @@ def transform_input_data(tensor_dict,
  Data transformation functions are applied in the following order.
  1. data_augmentation_fn (optional): applied on tensor_dict.
  2. model_preprocess_fn: applied only on image tensor in tensor_dict.
-  3. image_resizer_fn: applied only on instance mask tensor in tensor_dict.
+  3. image_resizer_fn: applied on original image and instance mask tensor in
+     tensor_dict.
  4. one_hot_encoding: applied to classes tensor in tensor_dict.
  5. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
@@ -70,10 +71,11 @@ def transform_input_data(tensor_dict,
    model_preprocess_fn: model's preprocess function to apply on image tensor.
      This function must take in a 4-D float tensor and return a 4-D preprocess
      float tensor and a tensor containing the true image shape.
-    image_resizer_fn: image resizer function to apply on groundtruth instance
-      masks. This function must take a 4-D float tensor of image and a 4-D
-      tensor of instances masks and return resized version of these along with
-      the true shapes.
+    image_resizer_fn: image resizer function to apply on original image (if
+      `retain_original_image` is True) and groundtruth instance masks. This
+      function must take a 3-D float tensor of an image and a 3-D tensor of
+      instance masks and return a resized version of these along with the true
+      shapes.
    num_classes: number of max classes to one-hot (or k-hot) encode the class
      labels.
    data_augmentation_fn: (optional) data augmentation function to apply on
@@ -88,17 +90,19 @@ def transform_input_data(tensor_dict,
    after applying all the transformations.
  """
  if retain_original_image:
-    tensor_dict[fields.InputDataFields.
-                original_image] = tensor_dict[fields.InputDataFields.image]
+    original_image_resized, _ = image_resizer_fn(
+        tensor_dict[fields.InputDataFields.image])
+    tensor_dict[fields.InputDataFields.original_image] = tf.cast(
+        original_image_resized, tf.uint8)

  # Apply data augmentation ops.
  if data_augmentation_fn is not None:
    tensor_dict = data_augmentation_fn(tensor_dict)

  # Apply model preprocessing ops and resize instance masks.
-  image = tf.expand_dims(
-      tf.to_float(tensor_dict[fields.InputDataFields.image]), axis=0)
-  preprocessed_resized_image, true_image_shape = model_preprocess_fn(image)
+  image = tensor_dict[fields.InputDataFields.image]
+  preprocessed_resized_image, true_image_shape = model_preprocess_fn(
+      tf.expand_dims(tf.to_float(image), axis=0))
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      preprocessed_resized_image, axis=0)
  tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
@@ -156,6 +160,52 @@ def augment_input_data(tensor_dict, data_augmentation_options):
  return tensor_dict


+def _get_labels_dict(input_dict):
+  """Extracts labels dict from input dict."""
+  required_label_keys = [
+      fields.InputDataFields.num_groundtruth_boxes,
+      fields.InputDataFields.groundtruth_boxes,
+      fields.InputDataFields.groundtruth_classes,
+      fields.InputDataFields.groundtruth_weights
+  ]
+  labels_dict = {}
+  for key in required_label_keys:
+    labels_dict[key] = input_dict[key]
+
+  optional_label_keys = [
+      fields.InputDataFields.groundtruth_keypoints,
+      fields.InputDataFields.groundtruth_instance_masks,
+      fields.InputDataFields.groundtruth_area,
+      fields.InputDataFields.groundtruth_is_crowd,
+      fields.InputDataFields.groundtruth_difficult
+  ]
+
+  for key in optional_label_keys:
+    if key in input_dict:
+      labels_dict[key] = input_dict[key]
+  if fields.InputDataFields.groundtruth_difficult in labels_dict:
+    labels_dict[fields.InputDataFields.groundtruth_difficult] = tf.cast(
+        labels_dict[fields.InputDataFields.groundtruth_difficult], tf.int32)
+  return labels_dict
+
+
+def _get_features_dict(input_dict):
+  """Extracts features dict from input dict."""
+  hash_from_source_id = tf.string_to_hash_bucket_fast(
+      input_dict[fields.InputDataFields.source_id], HASH_BINS)
+  features = {
+      fields.InputDataFields.image:
+          input_dict[fields.InputDataFields.image],
+      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
+      fields.InputDataFields.true_image_shape:
+          input_dict[fields.InputDataFields.true_image_shape]
+  }
+  if fields.InputDataFields.original_image in input_dict:
+    features[fields.InputDataFields.original_image] = input_dict[
+        fields.InputDataFields.original_image]
+  return features
+
+
 def create_train_input_fn(train_config, train_input_config,
                          model_config):
  """Creates a train `input` function for `Estimator`.
@@ -184,6 +234,8 @@ def create_train_input_fn(train_config, train_input_config,
        features[fields.InputDataFields.true_image_shape] is a [batch_size, 3]
          int32 tensor representing the true image shapes, as preprocessed
          images could be padded.
+        features[fields.InputDataFields.original_image] (optional) is a
+          [batch_size, H, W, C] float32 tensor with original images.
      labels: Dictionary of groundtruth tensors.
        labels[fields.InputDataFields.num_groundtruth_boxes] is a [batch_size]
          int32 tensor indicating the number of groundtruth boxes.
@@ -233,7 +285,8 @@ def create_train_input_fn(train_config, train_input_config,
        transform_input_data, model_preprocess_fn=model.preprocess,
        image_resizer_fn=image_resizer_fn,
        num_classes=config_util.get_number_of_classes(model_config),
-        data_augmentation_fn=data_augmentation_fn)
+        data_augmentation_fn=data_augmentation_fn,
+        retain_original_image=train_config.retain_original_images)
    dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
        train_input_config,
        transform_input_data_fn=transform_data_fn,
@@ -242,35 +295,8 @@ def create_train_input_fn(train_config, train_input_config,
        num_classes=config_util.get_number_of_classes(model_config),
        spatial_image_shape=config_util.get_spatial_image_size(
            image_resizer_config))
-    tensor_dict = dataset_util.make_initializable_iterator(dataset).get_next()
-
-    hash_from_source_id = tf.string_to_hash_bucket_fast(
-        tensor_dict[fields.InputDataFields.source_id], HASH_BINS)
-    features = {
-        fields.InputDataFields.image: tensor_dict[fields.InputDataFields.image],
-        HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
-        fields.InputDataFields.true_image_shape: tensor_dict[
-            fields.InputDataFields.true_image_shape]
-    }
-
-    labels = {
-        fields.InputDataFields.num_groundtruth_boxes: tensor_dict[
-            fields.InputDataFields.num_groundtruth_boxes],
-        fields.InputDataFields.groundtruth_boxes: tensor_dict[
-            fields.InputDataFields.groundtruth_boxes],
-        fields.InputDataFields.groundtruth_classes: tensor_dict[
-            fields.InputDataFields.groundtruth_classes],
-        fields.InputDataFields.groundtruth_weights: tensor_dict[
-            fields.InputDataFields.groundtruth_weights]
-    }
-    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
-      labels[fields.InputDataFields.groundtruth_keypoints] = tensor_dict[
-          fields.InputDataFields.groundtruth_keypoints]
-    if fields.InputDataFields.groundtruth_instance_masks in tensor_dict:
-      labels[fields.InputDataFields.groundtruth_instance_masks] = tensor_dict[
-          fields.InputDataFields.groundtruth_instance_masks]
-
-    return features, labels
+    input_dict = dataset_util.make_initializable_iterator(dataset).get_next()
+    return (_get_features_dict(input_dict), _get_labels_dict(input_dict))

  return _train_input_fn

@@ -345,7 +371,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
        image_resizer_fn=image_resizer_fn,
        num_classes=num_classes,
        data_augmentation_fn=None,
-        retain_original_image=True)
+        retain_original_image=eval_config.retain_original_images)
    dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
        eval_input_config,
        transform_input_data_fn=transform_data_fn,
@@ -355,36 +381,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
            image_resizer_config))
    input_dict = dataset_util.make_initializable_iterator(dataset).get_next()

-    hash_from_source_id = tf.string_to_hash_bucket_fast(
-        input_dict[fields.InputDataFields.source_id], HASH_BINS)
-    features = {
-        fields.InputDataFields.image:
-            input_dict[fields.InputDataFields.image],
-        fields.InputDataFields.original_image:
-            input_dict[fields.InputDataFields.original_image],
-        HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
-        fields.InputDataFields.true_image_shape:
-            input_dict[fields.InputDataFields.true_image_shape]
-    }
-
-    labels = {
-        fields.InputDataFields.groundtruth_boxes:
-            input_dict[fields.InputDataFields.groundtruth_boxes],
-        fields.InputDataFields.groundtruth_classes:
-            input_dict[fields.InputDataFields.groundtruth_classes],
-        fields.InputDataFields.groundtruth_area:
-            input_dict[fields.InputDataFields.groundtruth_area],
-        fields.InputDataFields.groundtruth_is_crowd:
-            input_dict[fields.InputDataFields.groundtruth_is_crowd],
-        fields.InputDataFields.groundtruth_difficult:
-            tf.cast(input_dict[fields.InputDataFields.groundtruth_difficult],
-                    tf.int32)
-    }
-    if fields.InputDataFields.groundtruth_instance_masks in input_dict:
-      labels[fields.InputDataFields.groundtruth_instance_masks] = input_dict[
-          fields.InputDataFields.groundtruth_instance_masks]
-
-    return features, labels
+    return (_get_features_dict(input_dict), _get_labels_dict(input_dict))

  return _eval_input_fn


--- a/research/object_detection/inputs_test.py
+++ b/research/object_detection/inputs_test.py
@@ -34,16 +34,12 @@ FLAGS = tf.flags.FLAGS

 def _get_configs_for_model(model_name):
  """Returns configurations for model."""
-  fname = os.path.join(
-      FLAGS.test_srcdir,
-      ('google3/third_party/tensorflow_models/'
-       'object_detection/samples/configs/' + model_name + '.config'))
-  label_map_path = os.path.join(FLAGS.test_srcdir,
-                                ('google3/third_party/tensorflow_models/'
-                                 'object_detection/data/pet_label_map.pbtxt'))
-  data_path = os.path.join(FLAGS.test_srcdir,
-                           ('google3/third_party/tensorflow_models/'
-                            'object_detection/test_data/pets_examples.record'))
+  fname = os.path.join(tf.resource_loader.get_data_files_path(),
+                       'samples/configs/' + model_name + '.config')
+  label_map_path = os.path.join(tf.resource_loader.get_data_files_path(),
+                                'data/pet_label_map.pbtxt')
+  data_path = os.path.join(tf.resource_loader.get_data_files_path(),
+                           'test_data/pets_examples.record')
  configs = config_util.get_configs_from_pipeline_file(fname)
  return config_util.merge_external_params_with_configs(
      configs,
@@ -462,22 +458,31 @@ class DataTransformationFnTest(tf.test.TestCase):
        fields.InputDataFields.groundtruth_classes:
            tf.constant(np.array([3, 1], np.int32))
    }
-    def fake_image_resizer_fn(image, masks):
+    def fake_image_resizer_fn(image, masks=None):
      resized_image = tf.image.resize_images(image, [8, 8])
-      resized_masks = tf.transpose(
-          tf.image.resize_images(tf.transpose(masks, [1, 2, 0]), [8, 8]),
-          [2, 0, 1])
-      return resized_image, resized_masks, tf.shape(resized_image)
+      results = [resized_image]
+      if masks is not None:
+        resized_masks = tf.transpose(
+            tf.image.resize_images(tf.transpose(masks, [1, 2, 0]), [8, 8]),
+            [2, 0, 1])
+        results.append(resized_masks)
+      results.append(tf.shape(resized_image))
+      return results

    num_classes = 3
    input_transformation_fn = functools.partial(
        inputs.transform_input_data,
        model_preprocess_fn=_fake_model_preprocessor_fn,
        image_resizer_fn=fake_image_resizer_fn,
-        num_classes=num_classes)
+        num_classes=num_classes,
+        retain_original_image=True)
    with self.test_session() as sess:
      transformed_inputs = sess.run(
          input_transformation_fn(tensor_dict=tensor_dict))
+    self.assertAllEqual(transformed_inputs[
+        fields.InputDataFields.original_image].dtype, tf.uint8)
+    self.assertAllEqual(transformed_inputs[
+        fields.InputDataFields.original_image].shape, [8, 8, 3])
    self.assertAllEqual(transformed_inputs[
        fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8])


--- a/research/object_detection/meta_architectures/ssd_meta_arch.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch.py
@@ -46,7 +46,8 @@ class SSDFeatureExtractor(object):
               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               inplace_batchnorm_update=False):
    """Constructor.

    Args:
@@ -64,6 +65,10 @@ class SSDFeatureExtractor(object):
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      inplace_batchnorm_update: Whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
    """
    self._is_training = is_training
    self._depth_multiplier = depth_multiplier
@@ -71,6 +76,7 @@ class SSDFeatureExtractor(object):
    self._pad_to_multiple = pad_to_multiple
    self._conv_hyperparams = conv_hyperparams
    self._batch_norm_trainable = batch_norm_trainable
+    self._inplace_batchnorm_update = inplace_batchnorm_update
    self._reuse_weights = reuse_weights
    self._use_explicit_padding = use_explicit_padding
    self._use_depthwise = use_depthwise
@@ -108,7 +114,29 @@ class SSDFeatureExtractor(object):
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
-    pass
+    batchnorm_updates_collections = (None if self._inplace_batchnorm_update
+                                     else tf.GraphKeys.UPDATE_OPS)
+
+    with slim.arg_scope([slim.batch_norm],
+                        updates_collections=batchnorm_updates_collections):
+      return self._extract_features(preprocessed_inputs)
+
+  @abstractmethod
+  def _extract_features(self, preprocessed_inputs):
+    """Extracts features from preprocessed inputs.
+
+    This function is responsible for extracting feature maps from preprocessed
+    images.
+
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+    """
+    raise NotImplementedError


 class SSDMetaArch(model.DetectionModel):

--- a/research/object_detection/model.py
+++ b/research/object_detection/model.py
@@ -49,8 +49,8 @@ tf.flags.DEFINE_string('model_dir', None, 'Path to output model directory '
                       'where event and checkpoint files will be written.')
 tf.flags.DEFINE_string('pipeline_config_path', None, 'Path to pipeline config '
                       'file.')
-tf.flags.DEFINE_integer('num_train_steps', 500000, 'Number of train steps.')
-tf.flags.DEFINE_integer('num_eval_steps', 10000, 'Number of train steps.')
+tf.flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
+tf.flags.DEFINE_integer('num_eval_steps', None, 'Number of train steps.')
 FLAGS = tf.flags.FLAGS


@@ -225,7 +225,14 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
          labels,
          unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
    elif mode == tf.estimator.ModeKeys.EVAL:
-      labels = unstack_batch(labels, unpad_groundtruth_tensors=False)
+      # For evaling on train data, it is necessary to check whether groundtruth
+      # must be unpadded.
+      boxes_shape = (
+          labels[fields.InputDataFields.groundtruth_boxes].get_shape()
+          .as_list())
+      unpad_groundtruth_tensors = True if boxes_shape[1] is not None else False
+      labels = unstack_batch(
+          labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
@@ -241,7 +248,9 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
          groundtruth_boxes_list=gt_boxes_list,
          groundtruth_classes_list=gt_classes_list,
          groundtruth_masks_list=gt_masks_list,
-          groundtruth_keypoints_list=gt_keypoints_list)
+          groundtruth_keypoints_list=gt_keypoints_list,
+          groundtruth_weights_list=labels[
+              fields.InputDataFields.groundtruth_weights])

    preprocessed_images = features[fields.InputDataFields.image]
    prediction_dict = detection_model.predict(
@@ -250,14 +259,6 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
        prediction_dict, features[fields.InputDataFields.true_image_shape])

    if mode == tf.estimator.ModeKeys.TRAIN:
-      if not train_config.fine_tune_checkpoint_type:
-        # train_config.from_detection_checkpoint field is deprecated. For
-        # backward compatibility, sets finetune_checkpoint_type based on
-        # from_detection_checkpoint.
-        if train_config.from_detection_checkpoint:
-          train_config.fine_tune_checkpoint_type = 'detection'
-        else:
-          train_config.fine_tune_checkpoint_type = 'classification'
      if train_config.fine_tune_checkpoint and hparams.load_pretrained:
        if not train_config.fine_tune_checkpoint_type:
          # train_config.from_detection_checkpoint field is deprecated. For
@@ -341,17 +342,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
      }

    eval_metric_ops = None
-    if mode == tf.estimator.ModeKeys.EVAL:
-      # Detection summaries during eval.
+    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      class_agnostic = (fields.DetectionResultFields.detection_classes
                        not in detections)
      groundtruth = _get_groundtruth_data(detection_model, class_agnostic)
      use_original_images = fields.InputDataFields.original_image in features
-      eval_images = (
+      original_images = (
          features[fields.InputDataFields.original_image] if use_original_images
          else features[fields.InputDataFields.image])
      eval_dict = eval_util.result_dict_for_single_example(
-          eval_images[0:1],
+          original_images[0:1],
          features[inputs.HASH_KEY][0],
          detections,
          groundtruth,
@@ -363,21 +363,26 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
      else:
        category_index = label_map_util.create_category_index_from_labelmap(
            eval_input_config.label_map_path)
+      img_summary = None
      if not use_tpu and use_original_images:
        detection_and_groundtruth = (
            vis_utils.draw_side_by_side_evaluation_image(
                eval_dict, category_index, max_boxes_to_draw=20,
                min_score_thresh=0.2))
-        tf.summary.image('Detections_Left_Groundtruth_Right',
-                         detection_and_groundtruth)
-
-      # Eval metrics on a single image.
-      eval_metrics = eval_config.metrics_set
-      if not eval_metrics:
-        eval_metrics = ['coco_detection_metrics']
-      eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
-          eval_metrics, category_index.values(), eval_dict,
-          include_metrics_per_category=False)
+        img_summary = tf.summary.image('Detections_Left_Groundtruth_Right',
+                                       detection_and_groundtruth)
+
+      if mode == tf.estimator.ModeKeys.EVAL:
+        # Eval metrics on a single example.
+        eval_metrics = eval_config.metrics_set
+        if not eval_metrics:
+          eval_metrics = ['coco_detection_metrics']
+        eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
+            eval_metrics, category_index.values(), eval_dict,
+            include_metrics_per_category=False)
+        if img_summary is not None:
+          eval_metric_ops['Detections_Left_Groundtruth_Right'] = (
+              img_summary, tf.no_op())

    if use_tpu:
      return tf.contrib.tpu.TPUEstimatorSpec(

--- a/research/object_detection/model_test.py
+++ b/research/object_detection/model_test.py
@@ -32,20 +32,19 @@ from object_detection.builders import model_builder
 from object_detection.core import standard_fields as fields
 from object_detection.utils import config_util

-FLAGS = tf.flags.FLAGS

 MODEL_NAME_FOR_TEST = model_test_util.SSD_INCEPTION_MODEL_NAME


 def _get_data_path():
  """Returns an absolute path to TFRecord file."""
-  return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'test_data',
+  return os.path.join(tf.resource_loader.get_data_files_path(), 'test_data',
                      'pets_examples.record')


 def _get_labelmap_path():
  """Returns an absolute path to label map file."""
-  return os.path.join(FLAGS.test_srcdir, model_test_util.PATH_BASE, 'data',
+  return os.path.join(tf.resource_loader.get_data_files_path(), 'data',
                      'pet_label_map.pbtxt')



--- a/research/object_detection/model_test_util.py
+++ b/research/object_detection/model_test_util.py
@@ -28,13 +28,12 @@ FLAGS = tf.flags.FLAGS

 FASTER_RCNN_MODEL_NAME = 'faster_rcnn_resnet50_pets'
 SSD_INCEPTION_MODEL_NAME = 'ssd_inception_v2_pets'
-PATH_BASE = 'google3/third_party/tensorflow_models/object_detection/'


 def GetPipelineConfigPath(model_name):
  """Returns path to the local pipeline config file."""
-  return os.path.join(FLAGS.test_srcdir, PATH_BASE, 'samples', 'configs',
-                      model_name + '.config')
+  return os.path.join(tf.resource_loader.get_data_files_path(), 'samples',
+                      'configs', model_name + '.config')


 def InitializeFlags(model_name_for_test):

--- a/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/embedded_ssd_mobilenet_v1_feature_extractor.py
@@ -53,7 +53,8 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
               batch_norm_trainable=True,
               reuse_weights=None,
               use_explicit_padding=False,
-               use_depthwise=False):
+               use_depthwise=False,
+               inplace_batchnorm_update=False):
    """MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.

    Args:
@@ -71,6 +72,11 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      inplace_batchnorm_update: Whether to update batch_norm inplace during
+        training. This is required for batch norm to work correctly on TPUs.
+        When this is false, user must add a control dependency on
+        tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+        norm moving average parameters.

    Raises:
      ValueError: upon invalid `pad_to_multiple` values.
@@ -82,9 +88,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
    super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
        conv_hyperparams, batch_norm_trainable, reuse_weights,
-        use_explicit_padding, use_depthwise)
+        use_explicit_padding, use_depthwise, inplace_batchnorm_update)

-  def extract_features(self, preprocessed_inputs):
+  def _extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:

--- a/research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor.py
@@ -22,30 +22,6 @@ from nets import mobilenet_v1
 slim = tf.contrib.slim


-def _batch_norm_arg_scope(list_ops,
-                          use_batch_norm=True,
-                          batch_norm_decay=0.9997,
-                          batch_norm_epsilon=0.001,
-                          batch_norm_scale=False,
-                          train_batch_norm=False):
-  """Slim arg scope for Mobilenet V1 batch norm."""
-  if use_batch_norm:
-    batch_norm_params = {
-        'is_training': train_batch_norm,
-        'scale': batch_norm_scale,
-        'decay': batch_norm_decay,
-        'epsilon': batch_norm_epsilon
-    }
-    normalizer_fn = slim.batch_norm
-  else:
-    normalizer_fn = None
-    batch_norm_params = None
-
-  return slim.arg_scope(list_ops,
-                        normalizer_fn=normalizer_fn,
-                        normalizer_params=batch_norm_params)
-
-
 class FasterRCNNMobilenetV1FeatureExtractor(
    faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
  """Faster R-CNN Mobilenet V1 feature extractor implementation."""
@@ -121,18 +97,19 @@ class FasterRCNNMobilenetV1FeatureExtractor(
        ['image size must at least be 33 in both height and width.'])

    with tf.control_dependencies([shape_assert]):
-      with tf.variable_scope('MobilenetV1',
-                             reuse=self._reuse_weights) as scope:
-        with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
-                                   batch_norm_scale=True,
-                                   train_batch_norm=self._train_batch_norm):
+      with slim.arg_scope(
+          mobilenet_v1.mobilenet_v1_arg_scope(
+              is_training=self._train_batch_norm,
+              weight_decay=self._weight_decay)):
+        with tf.variable_scope('MobilenetV1',
+                               reuse=self._reuse_weights) as scope:
          _, activations = mobilenet_v1.mobilenet_v1_base(
              preprocessed_inputs,
-              final_endpoint='Conv2d_13_pointwise',
+              final_endpoint='Conv2d_11_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
-    return activations['Conv2d_13_pointwise'], activations
+    return activations['Conv2d_11_pointwise'], activations

  def _extract_box_classifier_features(self, proposal_feature_maps, scope):
    """Extracts second stage box classifier features.
@@ -152,9 +129,10 @@ class FasterRCNNMobilenetV1FeatureExtractor(

    depth = lambda d: max(int(d * 1.0), 16)
    with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights):
-      with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
-                                 batch_norm_scale=True,
-                                 train_batch_norm=self._train_batch_norm):
+      with slim.arg_scope(
+          mobilenet_v1.mobilenet_v1_arg_scope(
+              is_training=self._train_batch_norm,
+              weight_decay=self._weight_decay)):
        with slim.arg_scope(
            [slim.conv2d, slim.separable_conv2d], padding='SAME'):
          net = slim.separable_conv2d(