Object detection Internal Changes. (#4757)

* Merged commit includes the following changes: 204316992 by Zhichao Lu: Update docs to prepare inputs -- 204309254 by Zhichao Lu: Update running_pets.md to use new binaries and correct a few things in running_on_cloud.md -- 204306734 by Zhichao Lu: Move old binaries into legacy folder and add deprecation notice. -- 204267757 by Zhichao Lu: Fixing a problem in VRD evaluation with missing ground truth annotations for images that do not contain objects from 62 groundtruth classes. -- 204167430 by Zhichao Lu: This fixes a flaky losses test failure. -- 203670721 by Zhichao Lu: Internal change. -- 203569388 by Zhichao Lu: Internal change 203546580 by Zhichao Lu: * Expand TPU compatibility g3doc with config snippets * Change mscoco dataset path in sample configs to the sharded versions -- 203325694 by Zhichao Lu: Make merge_multiple_label_boxes work for model_main code path. -- 203305655 by Zhichao Lu: Remove the 1x1 conv layer before pooling in MobileNet-v1-PPN feature extractor. -- 203139608 by Zhichao Lu: - Support exponential_decay with burnin learning rate schedule. - Add the minimum learning rate option. - Make the exponential decay start only after the burnin steps. -- 203068703 by Zhichao Lu: Modify create_coco_tf_record.py to output sharded files. -- 203025308 by Zhichao Lu: Add an option to share the prediction tower in WeightSharedBoxPredictor. -- 203024942 by Zhichao Lu: Move ssd mobilenet v1 ppn configs to third party. -- 202901259 by Zhichao Lu: Delete obsolete ssd mobilenet v1 focal loss configs and update pets dataset path -- 202894154 by Zhichao Lu: Move all TPU compatible ssd mobilenet v1 coco14/pet configs to third party. -- 202861774 by Zhichao Lu: Move Retinanet (SSD + FPN + Shared box predictor) configs to third_party. -- PiperOrigin-RevId: 204316992 * Add original files back.

Object detection Internal Changes. (#4757)
* Merged commit includes the following changes: 204316992 by Zhichao Lu: Update docs to prepare inputs -- 204309254 by Zhichao Lu: Update running_pets.md to use new binaries and correct a few things in running_on_cloud.md -- 204306734 by Zhichao Lu: Move old binaries into legacy folder and add deprecation notice. -- 204267757 by Zhichao Lu: Fixing a problem in VRD evaluation with missing ground truth annotations for images that do not contain objects from 62 groundtruth classes. -- 204167430 by Zhichao Lu: This fixes a flaky losses test failure. -- 203670721 by Zhichao Lu: Internal change. -- 203569388 by Zhichao Lu: Internal change 203546580 by Zhichao Lu: * Expand TPU compatibility g3doc with config snippets * Change mscoco dataset path in sample configs to the sharded versions -- 203325694 by Zhichao Lu: Make merge_multiple_label_boxes work for model_main code path. -- 203305655 by Zhichao Lu: Remove the 1x1 conv layer before pooling in MobileNet-v1-PPN feature extractor. -- 203139608 by Zhichao Lu: - Support exponential_decay with burnin learning rate schedule. - Add the minimum learning rate option. - Make the exponential decay start only after the burnin steps. -- 203068703 by Zhichao Lu: Modify create_coco_tf_record.py to output sharded files. -- 203025308 by Zhichao Lu: Add an option to share the prediction tower in WeightSharedBoxPredictor. -- 203024942 by Zhichao Lu: Move ssd mobilenet v1 ppn configs to third party. -- 202901259 by Zhichao Lu: Delete obsolete ssd mobilenet v1 focal loss configs and update pets dataset path -- 202894154 by Zhichao Lu: Move all TPU compatible ssd mobilenet v1 coco14/pet configs to third party. -- 202861774 by Zhichao Lu: Move Retinanet (SSD + FPN + Shared box predictor) configs to third_party. -- PiperOrigin-RevId: 204316992 * Add original files back.
70255908 · pkulzc · GitHub · ee6fdda1 · 70255908 · 70255908
Unverified Commit 70255908 authored Jul 12, 2018 by pkulzc Committed by GitHub Jul 12, 2018
20 changed files
--- a/research/object_detection/train.py
+++ b/research/object_detection/train.py
@@ -46,10 +46,10 @@ import json
 import os
 import tensorflow as tf

-from object_detection import trainer
 from object_detection.builders import dataset_builder
 from object_detection.builders import graph_rewriter_builder
 from object_detection.builders import model_builder
+from object_detection.legacy import trainer
 from object_detection.utils import config_util

 tf.logging.set_verbosity(tf.logging.INFO)
@@ -84,6 +84,7 @@ flags.DEFINE_string('model_config_path', '',
 FLAGS = flags.FLAGS


+@tf.contrib.framework.deprecated(None, 'Use object_detection/model_main.py.')
 def main(_):
  assert FLAGS.train_dir, '`train_dir` is missing.'
  if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir)

--- a/research/object_detection/trainer.py
+++ b/research/object_detection/trainer.py
--- a/research/object_detection/trainer_test.py
+++ b/research/object_detection/trainer_test.py
@@ -19,10 +19,10 @@ import tensorflow as tf

 from google.protobuf import text_format

-from object_detection import trainer
 from object_detection.core import losses
 from object_detection.core import model
 from object_detection.core import standard_fields as fields
+from object_detection.legacy import trainer
 from object_detection.protos import train_pb2



--- a/research/object_detection/metrics/offline_eval_map_corloc.py
+++ b/research/object_detection/metrics/offline_eval_map_corloc.py
@@ -36,8 +36,8 @@ import os
 import re
 import tensorflow as tf

-from object_detection import evaluator
 from object_detection.core import standard_fields
+from object_detection.legacy import evaluator
 from object_detection.metrics import tf_example_parser
 from object_detection.utils import config_util
 from object_detection.utils import label_map_util

--- a/research/object_detection/models/feature_map_generators.py
+++ b/research/object_detection/models/feature_map_generators.py
@@ -223,3 +223,69 @@ def fpn_top_down_feature_maps(image_features, depth, scope=None):
        output_feature_map_keys.append('top_down_%s' % image_features[level][0])
      return collections.OrderedDict(
          reversed(zip(output_feature_map_keys, output_feature_maps_list)))
+
+
+def pooling_pyramid_feature_maps(base_feature_map_depth, num_layers,
+                                 image_features):
+  """Generates pooling pyramid feature maps.
+
+  The pooling pyramid feature maps is motivated by
+  multi_resolution_feature_maps. The main difference are that it is simpler and
+  reduces the number of free parameters.
+
+  More specifically:
+   - Instead of using convolutions to shrink the feature map, it uses max
+     pooling, therefore totally gets rid of the parameters in convolution.
+   - By pooling feature from larger map up to a single cell, it generates
+     features in the same feature space.
+   - Instead of independently making box predictions from individual maps, it
+     shares the same classifier across different feature maps, therefore reduces
+     the "mis-calibration" across different scales.
+
+  See go/ppn-detection for more details.
+
+  Args:
+    base_feature_map_depth: Depth of the base feature before the max pooling.
+    num_layers: Number of layers used to make predictions. They are pooled
+      from the base feature.
+    image_features: A dictionary of handles to activation tensors from the
+      feature extractor.
+
+  Returns:
+    feature_maps: an OrderedDict mapping keys (feature map names) to
+      tensors where each tensor has shape [batch, height_i, width_i, depth_i].
+  Raises:
+    ValueError: image_features does not contain exactly one entry
+  """
+  if len(image_features) != 1:
+    raise ValueError('image_features should be a dictionary of length 1.')
+  image_features = image_features[image_features.keys()[0]]
+
+  feature_map_keys = []
+  feature_maps = []
+  feature_map_key = 'Base_Conv2d_1x1_%d' % base_feature_map_depth
+  if base_feature_map_depth > 0:
+    image_features = slim.conv2d(
+        image_features,
+        base_feature_map_depth,
+        [1, 1],  # kernel size
+        padding='SAME', stride=1, scope=feature_map_key)
+    # Add a 1x1 max-pooling node (a no op node) immediately after the conv2d for
+    # TPU v1 compatibility.  Without the following dummy op, TPU runtime
+    # compiler will combine the convolution with one max-pooling below into a
+    # single cycle, so getting the conv2d feature becomes impossible.
+    image_features = slim.max_pool2d(
+        image_features, [1, 1], padding='SAME', stride=1, scope=feature_map_key)
+  feature_map_keys.append(feature_map_key)
+  feature_maps.append(image_features)
+  feature_map = image_features
+  with slim.arg_scope([slim.max_pool2d], padding='SAME', stride=2):
+    for i in range(num_layers - 1):
+      feature_map_key = 'MaxPool2d_%d_2x2' % i
+      feature_map = slim.max_pool2d(
+          feature_map, [2, 2], padding='SAME', scope=feature_map_key)
+      feature_map_keys.append(feature_map_key)
+      feature_maps.append(feature_map)
+  return collections.OrderedDict(
+      [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
+
--- a/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""SSD MobilenetV1 FPN Feature Extractor."""
+
+import tensorflow as tf
+
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import feature_map_generators
+from object_detection.utils import context_manager
+from object_detection.utils import ops
+from object_detection.utils import shape_utils
+from nets import mobilenet_v1
+
+slim = tf.contrib.slim
+
+
+class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
+  """SSD Feature Extractor using MobilenetV1 FPN features."""
+
+  def preprocess(self, resized_inputs):
+    """SSD preprocessing.
+
+    Maps pixel values to the range [-1, 1].
+
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    return (2.0 / 255.0) * resized_inputs - 1.0
+
+  def extract_features(self, preprocessed_inputs):
+    """Extract features from preprocessed inputs.
+
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+    """
+    preprocessed_inputs = shape_utils.check_min_image_dim(
+        33, preprocessed_inputs)
+
+    with tf.variable_scope('MobilenetV1',
+                           reuse=self._reuse_weights) as scope:
+      with slim.arg_scope(
+          mobilenet_v1.mobilenet_v1_arg_scope(
+              is_training=None, regularize_depthwise=True)):
+        with (slim.arg_scope(self._conv_hyperparams_fn())
+              if self._override_base_feature_extractor_hyperparams
+              else context_manager.IdentityContextManager()):
+          _, image_features = mobilenet_v1.mobilenet_v1_base(
+              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+              final_endpoint='Conv2d_13_pointwise',
+              min_depth=self._min_depth,
+              depth_multiplier=self._depth_multiplier,
+              use_explicit_padding=self._use_explicit_padding,
+              scope=scope)
+
+      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
+      with slim.arg_scope(self._conv_hyperparams_fn()):
+        with tf.variable_scope('fpn', reuse=self._reuse_weights):
+          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
+              [(key, image_features[key])
+               for key in ['Conv2d_5_pointwise', 'Conv2d_11_pointwise',
+                           'Conv2d_13_pointwise']],
+              depth=depth_fn(256))
+          last_feature_map = fpn_features['top_down_Conv2d_13_pointwise']
+          coarse_features = {}
+          for i in range(14, 16):
+            last_feature_map = slim.conv2d(
+                last_feature_map,
+                num_outputs=depth_fn(256),
+                kernel_size=[3, 3],
+                stride=2,
+                padding='SAME',
+                scope='bottom_up_Conv2d_{}'.format(i))
+            coarse_features['bottom_up_Conv2d_{}'.format(i)] = last_feature_map
+    return [fpn_features['top_down_Conv2d_5_pointwise'],
+            fpn_features['top_down_Conv2d_11_pointwise'],
+            fpn_features['top_down_Conv2d_13_pointwise'],
+            coarse_features['bottom_up_Conv2d_14'],
+            coarse_features['bottom_up_Conv2d_15']]
--- a/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for ssd_mobilenet_v1_fpn_feature_extractor."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.models import ssd_feature_extractor_test
+from object_detection.models import ssd_mobilenet_v1_fpn_feature_extractor
+
+slim = tf.contrib.slim
+
+
+class SsdMobilenetV1FpnFeatureExtractorTest(
+    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
+
+  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+                                is_training=True, use_explicit_padding=False):
+    """Constructs a new feature extractor.
+
+    Args:
+      depth_multiplier: float depth multiplier for feature extractor
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      is_training: whether the network is in training mode.
+      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+        inputs so that the output dimensions are the same as if 'SAME' padding
+        were used.
+    Returns:
+      an ssd_meta_arch.SSDFeatureExtractor object.
+    """
+    min_depth = 32
+    return (ssd_mobilenet_v1_fpn_feature_extractor.
+            SSDMobileNetV1FpnFeatureExtractor(
+                is_training,
+                depth_multiplier,
+                min_depth,
+                pad_to_multiple,
+                self.conv_hyperparams_fn,
+                use_explicit_padding=use_explicit_padding))
+
+  def test_extract_features_returns_correct_shapes_256(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
+                                  (2, 8, 8, 256), (2, 4, 4, 256),
+                                  (2, 2, 2, 256)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_returns_correct_shapes_384(self):
+    image_height = 320
+    image_width = 320
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 40, 40, 256), (2, 20, 20, 256),
+                                  (2, 10, 10, 256), (2, 5, 5, 256),
+                                  (2, 3, 3, 256)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_with_dynamic_image_shape(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 32, 32, 256), (2, 16, 16, 256),
+                                  (2, 8, 8, 256), (2, 4, 4, 256),
+                                  (2, 2, 2, 256)]
+    self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
+    image_height = 299
+    image_width = 299
+    depth_multiplier = 1.0
+    pad_to_multiple = 32
+    expected_feature_map_shape = [(2, 40, 40, 256), (2, 20, 20, 256),
+                                  (2, 10, 10, 256), (2, 5, 5, 256),
+                                  (2, 3, 3, 256)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 0.5**12
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 32, 32, 32), (2, 16, 16, 32),
+                                  (2, 8, 8, 32), (2, 4, 4, 32),
+                                  (2, 2, 2, 32)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_raises_error_with_invalid_image_size(self):
+    image_height = 32
+    image_width = 32
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    self.check_extract_features_raises_error_with_invalid_image_size(
+        image_height, image_width, depth_multiplier, pad_to_multiple)
+
+  def test_preprocess_returns_correct_value_range(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 1
+    pad_to_multiple = 1
+    test_image = np.random.rand(2, image_height, image_width, 3)
+    feature_extractor = self._create_feature_extractor(depth_multiplier,
+                                                       pad_to_multiple)
+    preprocessed_image = feature_extractor.preprocess(test_image)
+    self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
+
+  def test_variables_only_created_in_scope(self):
+    depth_multiplier = 1
+    pad_to_multiple = 1
+    scope_name = 'MobilenetV1'
+    self.check_feature_extractor_variables_under_scope(
+        depth_multiplier, pad_to_multiple, scope_name)
+
+  def test_fused_batchnorm(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 1
+    pad_to_multiple = 1
+    image_placeholder = tf.placeholder(tf.float32,
+                                       [1, image_height, image_width, 3])
+    feature_extractor = self._create_feature_extractor(depth_multiplier,
+                                                       pad_to_multiple)
+    preprocessed_image = feature_extractor.preprocess(image_placeholder)
+    _ = feature_extractor.extract_features(preprocessed_image)
+    self.assertTrue(
+        any(op.type == 'FusedBatchNorm'
+            for op in tf.get_default_graph().get_operations()))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/ssd_mobilenet_v1_ppn_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_ppn_feature_extractor.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""SSDFeatureExtractor for MobilenetV1 PPN features."""
+
+import tensorflow as tf
+
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import feature_map_generators
+from object_detection.utils import context_manager
+from object_detection.utils import ops
+from object_detection.utils import shape_utils
+from nets import mobilenet_v1
+
+slim = tf.contrib.slim
+
+
+class SSDMobileNetV1PpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
+  """SSD Feature Extractor using MobilenetV1 PPN features."""
+
+  def preprocess(self, resized_inputs):
+    """SSD preprocessing.
+
+    Maps pixel values to the range [-1, 1].
+
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    return (2.0 / 255.0) * resized_inputs - 1.0
+
+  def extract_features(self, preprocessed_inputs):
+    """Extract features from preprocessed inputs.
+
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+    """
+    preprocessed_inputs = shape_utils.check_min_image_dim(
+        33, preprocessed_inputs)
+
+    with tf.variable_scope('MobilenetV1',
+                           reuse=self._reuse_weights) as scope:
+      with slim.arg_scope(
+          mobilenet_v1.mobilenet_v1_arg_scope(
+              is_training=None, regularize_depthwise=True)):
+        with (slim.arg_scope(self._conv_hyperparams_fn())
+              if self._override_base_feature_extractor_hyperparams
+              else context_manager.IdentityContextManager()):
+          _, image_features = mobilenet_v1.mobilenet_v1_base(
+              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
+              final_endpoint='Conv2d_13_pointwise',
+              min_depth=self._min_depth,
+              depth_multiplier=self._depth_multiplier,
+              use_explicit_padding=self._use_explicit_padding,
+              scope=scope)
+      with slim.arg_scope(self._conv_hyperparams_fn()):
+        feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
+            base_feature_map_depth=0,
+            num_layers=6,
+            image_features={
+                'image_features': image_features['Conv2d_11_pointwise']
+            })
+    return feature_maps.values()
--- a/research/object_detection/models/ssd_mobilenet_v1_ppn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_ppn_feature_extractor_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for ssd_mobilenet_v1_ppn_feature_extractor."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.models import ssd_feature_extractor_test
+from object_detection.models import ssd_mobilenet_v1_ppn_feature_extractor
+
+slim = tf.contrib.slim
+
+
+class SsdMobilenetV1PpnFeatureExtractorTest(
+    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
+
+  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+                                is_training=True, use_explicit_padding=False):
+    """Constructs a new feature extractor.
+
+    Args:
+      depth_multiplier: float depth multiplier for feature extractor
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      is_training: whether the network is in training mode.
+      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+        inputs so that the output dimensions are the same as if 'SAME' padding
+        were used.
+    Returns:
+      an ssd_meta_arch.SSDFeatureExtractor object.
+    """
+    min_depth = 32
+    return (ssd_mobilenet_v1_ppn_feature_extractor.
+            SSDMobileNetV1PpnFeatureExtractor(
+                is_training,
+                depth_multiplier,
+                min_depth,
+                pad_to_multiple,
+                self.conv_hyperparams_fn,
+                use_explicit_padding=use_explicit_padding))
+
+  def test_extract_features_returns_correct_shapes_320(self):
+    image_height = 320
+    image_width = 320
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 512),
+                                  (2, 5, 5, 512), (2, 3, 3, 512),
+                                  (2, 2, 2, 512), (2, 1, 1, 512)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_returns_correct_shapes_300(self):
+    image_height = 300
+    image_width = 300
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 19, 19, 512), (2, 10, 10, 512),
+                                  (2, 5, 5, 512), (2, 3, 3, 512),
+                                  (2, 2, 2, 512), (2, 1, 1, 512)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_returns_correct_shapes_640(self):
+    image_height = 640
+    image_width = 640
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 40, 40, 512), (2, 20, 20, 512),
+                                  (2, 10, 10, 512), (2, 5, 5, 512),
+                                  (2, 3, 3, 512), (2, 2, 2, 512)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_with_dynamic_image_shape(self):
+    image_height = 320
+    image_width = 320
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 512),
+                                  (2, 5, 5, 512), (2, 3, 3, 512),
+                                  (2, 2, 2, 512), (2, 1, 1, 512)]
+    self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
+    image_height = 299
+    image_width = 299
+    depth_multiplier = 1.0
+    pad_to_multiple = 32
+    expected_feature_map_shape = [(2, 20, 20, 512), (2, 10, 10, 512),
+                                  (2, 5, 5, 512), (2, 3, 3, 512),
+                                  (2, 2, 2, 512)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 0.5**12
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 16, 16, 32), (2, 8, 8, 32),
+                                  (2, 4, 4, 32), (2, 2, 2, 32),
+                                  (2, 1, 1, 32)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=False)
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_extract_features_raises_error_with_invalid_image_size(self):
+    image_height = 32
+    image_width = 32
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    self.check_extract_features_raises_error_with_invalid_image_size(
+        image_height, image_width, depth_multiplier, pad_to_multiple)
+
+  def test_preprocess_returns_correct_value_range(self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1
+    pad_to_multiple = 1
+    test_image = np.random.rand(2, image_height, image_width, 3)
+    feature_extractor = self._create_feature_extractor(depth_multiplier,
+                                                       pad_to_multiple)
+    preprocessed_image = feature_extractor.preprocess(test_image)
+    self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
+
+  def test_variables_only_created_in_scope(self):
+    depth_multiplier = 1
+    pad_to_multiple = 1
+    scope_name = 'MobilenetV1'
+    self.check_feature_extractor_variables_under_scope(
+        depth_multiplier, pad_to_multiple, scope_name)
+
+  def test_has_fused_batchnorm(self):
+    image_height = 320
+    image_width = 320
+    depth_multiplier = 1
+    pad_to_multiple = 1
+    image_placeholder = tf.placeholder(tf.float32,
+                                       [1, image_height, image_width, 3])
+    feature_extractor = self._create_feature_extractor(depth_multiplier,
+                                                       pad_to_multiple)
+    preprocessed_image = feature_extractor.preprocess(image_placeholder)
+    _ = feature_extractor.extract_features(preprocessed_image)
+    self.assertTrue(any(op.type == 'FusedBatchNorm'
+                        for op in tf.get_default_graph().get_operations()))
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor.py
+++ b/research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SSD feature extractors based on Resnet v1 and PPN architectures."""
+
+import tensorflow as tf
+
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import feature_map_generators
+from object_detection.utils import context_manager
+from object_detection.utils import ops
+from object_detection.utils import shape_utils
+from nets import resnet_v1
+
+slim = tf.contrib.slim
+
+
+class _SSDResnetPpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
+  """SSD feature extractor based on resnet architecture and PPN."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams_fn,
+               resnet_base_fn,
+               resnet_scope_name,
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False,
+               base_feature_map_depth=1024,
+               num_layers=6,
+               override_base_feature_extractor_hyperparams=False,
+               use_bounded_activations=False):
+    """Resnet based PPN Feature Extractor for SSD Models.
+
+    See go/pooling-pyramid for more details about PPN.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
+      resnet_base_fn: base resnet network to use.
+      resnet_scope_name: scope name to construct resnet
+      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False.
+      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      base_feature_map_depth: Depth of the base feature before the max pooling.
+      num_layers: Number of layers used to make predictions. They are pooled
+        from the base feature.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
+      use_bounded_activations: Whether or not to use bounded activations for
+        resnet v1 bottleneck residual unit. Bounded activations better lend
+        themselves to quantized inference.
+    """
+    super(_SSDResnetPpnFeatureExtractor, self).__init__(
+        is_training, depth_multiplier, min_depth, pad_to_multiple,
+        conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise,
+        override_base_feature_extractor_hyperparams)
+    self._resnet_base_fn = resnet_base_fn
+    self._resnet_scope_name = resnet_scope_name
+    self._base_feature_map_depth = base_feature_map_depth
+    self._num_layers = num_layers
+    self._use_bounded_activations = use_bounded_activations
+
+  def _filter_features(self, image_features):
+    # TODO(rathodv): Change resnet endpoint to strip scope prefixes instead
+    # of munging the scope here.
+    filtered_image_features = dict({})
+    for key, feature in image_features.items():
+      feature_name = key.split('/')[-1]
+      if feature_name in ['block2', 'block3', 'block4']:
+        filtered_image_features[feature_name] = feature
+    return filtered_image_features
+
+  def preprocess(self, resized_inputs):
+    """SSD preprocessing.
+
+    VGG style channel mean subtraction as described here:
+    https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
+
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    channel_means = [123.68, 116.779, 103.939]
+    return resized_inputs - [[channel_means]]
+
+  def extract_features(self, preprocessed_inputs):
+    """Extract features from preprocessed inputs.
+
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+
+    Raises:
+      ValueError: depth multiplier is not supported.
+    """
+    if self._depth_multiplier != 1.0:
+      raise ValueError('Depth multiplier not supported.')
+
+    preprocessed_inputs = shape_utils.check_min_image_dim(
+        129, preprocessed_inputs)
+
+    with tf.variable_scope(
+        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
+      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
+        with (slim.arg_scope(self._conv_hyperparams_fn())
+              if self._override_base_feature_extractor_hyperparams else
+              context_manager.IdentityContextManager()):
+          with slim.arg_scope(
+              [resnet_v1.bottleneck],
+              use_bounded_activations=self._use_bounded_activations):
+            _, activations = self._resnet_base_fn(
+                inputs=ops.pad_to_multiple(preprocessed_inputs,
+                                           self._pad_to_multiple),
+                num_classes=None,
+                is_training=None,
+                global_pool=False,
+                output_stride=None,
+                store_non_strided_activations=True,
+                scope=scope)
+
+      with slim.arg_scope(self._conv_hyperparams_fn()):
+        feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
+            base_feature_map_depth=self._base_feature_map_depth,
+            num_layers=self._num_layers,
+            image_features={
+                'image_features': self._filter_features(activations)['block3']
+            })
+    return feature_maps.values()
+
+
+class SSDResnet50V1PpnFeatureExtractor(_SSDResnetPpnFeatureExtractor):
+  """PPN Resnet50 v1 Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams_fn,
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
+    """Resnet50 v1 Feature Extractor for SSD Models.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
+      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False.
+      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
+    """
+    super(SSDResnet50V1PpnFeatureExtractor, self).__init__(
+        is_training, depth_multiplier, min_depth, pad_to_multiple,
+        conv_hyperparams_fn, resnet_v1.resnet_v1_50, 'resnet_v1_50',
+        reuse_weights, use_explicit_padding, use_depthwise,
+        override_base_feature_extractor_hyperparams=(
+            override_base_feature_extractor_hyperparams))
+
+
+class SSDResnet101V1PpnFeatureExtractor(_SSDResnetPpnFeatureExtractor):
+  """PPN Resnet101 v1 Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams_fn,
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
+    """Resnet101 v1 Feature Extractor for SSD Models.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
+      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False.
+      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
+    """
+    super(SSDResnet101V1PpnFeatureExtractor, self).__init__(
+        is_training, depth_multiplier, min_depth, pad_to_multiple,
+        conv_hyperparams_fn, resnet_v1.resnet_v1_101, 'resnet_v1_101',
+        reuse_weights, use_explicit_padding, use_depthwise,
+        override_base_feature_extractor_hyperparams=(
+            override_base_feature_extractor_hyperparams))
+
+
+class SSDResnet152V1PpnFeatureExtractor(_SSDResnetPpnFeatureExtractor):
+  """PPN Resnet152 v1 Feature Extractor."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams_fn,
+               reuse_weights=None,
+               use_explicit_padding=False,
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False):
+    """Resnet152 v1 Feature Extractor for SSD Models.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
+        and separable_conv2d ops in the layers that are added on top of the
+        base feature extractor.
+      reuse_weights: Whether to reuse variables. Default is None.
+      use_explicit_padding: Whether to use explicit padding when extracting
+        features. Default is False.
+      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams_fn`.
+    """
+    super(SSDResnet152V1PpnFeatureExtractor, self).__init__(
+        is_training, depth_multiplier, min_depth, pad_to_multiple,
+        conv_hyperparams_fn, resnet_v1.resnet_v1_152, 'resnet_v1_152',
+        reuse_weights, use_explicit_padding, use_depthwise,
+        override_base_feature_extractor_hyperparams=(
+            override_base_feature_extractor_hyperparams))
--- a/research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ssd resnet v1 feature extractors."""
+import tensorflow as tf
+
+from object_detection.models import ssd_resnet_v1_ppn_feature_extractor
+from object_detection.models import ssd_resnet_v1_ppn_feature_extractor_testbase
+
+
+class SSDResnet50V1PpnFeatureExtractorTest(
+    ssd_resnet_v1_ppn_feature_extractor_testbase.
+    SSDResnetPpnFeatureExtractorTestBase):
+  """SSDResnet50v1 feature extractor test."""
+
+  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+                                use_explicit_padding=False):
+    min_depth = 32
+    is_training = True
+    return ssd_resnet_v1_ppn_feature_extractor.SSDResnet50V1PpnFeatureExtractor(
+        is_training,
+        depth_multiplier,
+        min_depth,
+        pad_to_multiple,
+        self.conv_hyperparams_fn,
+        use_explicit_padding=use_explicit_padding)
+
+  def _scope_name(self):
+    return 'resnet_v1_50'
+
+
+class SSDResnet101V1PpnFeatureExtractorTest(
+    ssd_resnet_v1_ppn_feature_extractor_testbase.
+    SSDResnetPpnFeatureExtractorTestBase):
+  """SSDResnet101v1 feature extractor test."""
+
+  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+                                use_explicit_padding=False):
+    min_depth = 32
+    is_training = True
+    return (
+        ssd_resnet_v1_ppn_feature_extractor.SSDResnet101V1PpnFeatureExtractor(
+            is_training,
+            depth_multiplier,
+            min_depth,
+            pad_to_multiple,
+            self.conv_hyperparams_fn,
+            use_explicit_padding=use_explicit_padding))
+
+  def _scope_name(self):
+    return 'resnet_v1_101'
+
+
+class SSDResnet152V1PpnFeatureExtractorTest(
+    ssd_resnet_v1_ppn_feature_extractor_testbase.
+    SSDResnetPpnFeatureExtractorTestBase):
+  """SSDResnet152v1 feature extractor test."""
+
+  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
+                                use_explicit_padding=False):
+    min_depth = 32
+    is_training = True
+    return (
+        ssd_resnet_v1_ppn_feature_extractor.SSDResnet152V1PpnFeatureExtractor(
+            is_training,
+            depth_multiplier,
+            min_depth,
+            pad_to_multiple,
+            self.conv_hyperparams_fn,
+            use_explicit_padding=use_explicit_padding))
+
+  def _scope_name(self):
+    return 'resnet_v1_152'
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_testbase.py
+++ b/research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_testbase.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ssd resnet v1 feature extractors."""
+import abc
+import numpy as np
+
+from object_detection.models import ssd_feature_extractor_test
+
+
+class SSDResnetPpnFeatureExtractorTestBase(
+    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
+  """Helper test class for SSD Resnet PPN feature extractors."""
+
+  @abc.abstractmethod
+  def _scope_name(self):
+    pass
+
+  def test_extract_features_returns_correct_shapes_289(self):
+    image_height = 289
+    image_width = 289
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 19, 19, 1024), (2, 10, 10, 1024),
+                                  (2, 5, 5, 1024), (2, 3, 3, 1024),
+                                  (2, 2, 2, 1024), (2, 1, 1, 1024)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape)
+
+  def test_extract_features_returns_correct_shapes_with_dynamic_inputs(self):
+    image_height = 289
+    image_width = 289
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 19, 19, 1024), (2, 10, 10, 1024),
+                                  (2, 5, 5, 1024), (2, 3, 3, 1024),
+                                  (2, 2, 2, 1024), (2, 1, 1, 1024)]
+    self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape)
+
+  def test_extract_features_raises_error_with_invalid_image_size(self):
+    image_height = 32
+    image_width = 32
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    self.check_extract_features_raises_error_with_invalid_image_size(
+        image_height, image_width, depth_multiplier, pad_to_multiple)
+
+  def test_preprocess_returns_correct_value_range(self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1
+    pad_to_multiple = 1
+    test_image = np.random.rand(4, image_height, image_width, 3)
+    feature_extractor = self._create_feature_extractor(depth_multiplier,
+                                                       pad_to_multiple)
+    preprocessed_image = feature_extractor.preprocess(test_image)
+    self.assertAllClose(preprocessed_image,
+                        test_image - [[123.68, 116.779, 103.939]])
+
+  def test_variables_only_created_in_scope(self):
+    depth_multiplier = 1
+    pad_to_multiple = 1
+    self.check_feature_extractor_variables_under_scope(
+        depth_multiplier, pad_to_multiple, self._scope_name())
--- a/research/object_detection/protos/box_predictor.proto
+++ b/research/object_detection/protos/box_predictor.proto
@@ -4,14 +4,14 @@ package object_detection.protos;

 import "object_detection/protos/hyperparams.proto";

-
 // Configuration proto for box predictor. See core/box_predictor.py for details.
 message BoxPredictor {
  oneof box_predictor_oneof {
    ConvolutionalBoxPredictor convolutional_box_predictor = 1;
    MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2;
    RfcnBoxPredictor rfcn_box_predictor = 3;
-    WeightSharedConvolutionalBoxPredictor weight_shared_convolutional_box_predictor = 4;
+    WeightSharedConvolutionalBoxPredictor
+        weight_shared_convolutional_box_predictor = 4;
  }
 }

@@ -82,11 +82,15 @@ message WeightSharedConvolutionalBoxPredictor {
  // https://arxiv.org/abs/1708.02002 for details.
  optional float class_prediction_bias_init = 10 [default = 0.0];

-   // Whether to use dropout for class prediction.
+  // Whether to use dropout for class prediction.
  optional bool use_dropout = 11 [default = false];

  // Keep probability for dropout
  optional float dropout_keep_probability = 12 [default = 0.8];
+
+  // Whether to share the multi-layer tower between box prediction and class
+  // prediction heads.
+  optional bool share_prediction_tower = 13 [default = false];
 }

 message MaskRCNNBoxPredictor {
@@ -94,7 +98,7 @@ message MaskRCNNBoxPredictor {
  optional Hyperparams fc_hyperparams = 1;

  // Whether to use dropout op prior to the both box and class predictions.
-  optional bool use_dropout = 2 [default= false];
+  optional bool use_dropout = 2 [default = false];

  // Keep probability for dropout. This is only used if use_dropout is true.
  optional float dropout_keep_probability = 3 [default = 0.5];
@@ -141,13 +145,13 @@ message RfcnBoxPredictor {
  optional int32 num_spatial_bins_width = 3 [default = 3];

  // Target depth to reduce the input image features to.
-  optional int32 depth = 4 [default=1024];
+  optional int32 depth = 4 [default = 1024];

  // Size of the encoding for the boxes.
  optional int32 box_code_size = 5 [default = 4];

  // Size to resize the rfcn crops to.
-  optional int32 crop_height = 6 [default= 12];
+  optional int32 crop_height = 6 [default = 12];

-  optional int32 crop_width = 7 [default=12];
+  optional int32 crop_width = 7 [default = 12];
 }
--- a/research/object_detection/protos/optimizer.proto
+++ b/research/object_detection/protos/optimizer.proto
@@ -61,6 +61,9 @@ message ExponentialDecayLearningRate {
  optional uint32 decay_steps = 2 [default = 4000000];
  optional float decay_factor = 3 [default = 0.95];
  optional bool staircase = 4 [default = true];
+  optional float burnin_learning_rate = 5 [default = 0.0];
+  optional uint32 burnin_steps =  6 [default = 0];
+  optional float min_learning_rate =  7 [default = 0.0];
 }

 // Configuration message for a manually defined learning rate schedule.

--- a/research/object_detection/samples/cloud/cloud.yml
+++ b/research/object_detection/samples/cloud/cloud.yml
 trainingInput:
-  runtimeVersion: "1.0"
+  runtimeVersion: "1.8"
  scaleTier: CUSTOM
  masterType: standard_gpu
  workerCount: 5

--- a/research/object_detection/samples/configs/embedded_ssd_mobilenet_v1_coco.config
+++ b/research/object_detection/samples/configs/embedded_ssd_mobilenet_v1_coco.config
@@ -166,7 +166,7 @@ train_config: {

 train_input_reader: {
  tf_record_input_reader {
-    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
  }
  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
 }
@@ -178,7 +178,7 @@ eval_config: {

 eval_input_reader: {
  tf_record_input_reader {
-    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
  }
  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
  shuffle: false

--- a/research/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_coco.config
+++ b/research/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_coco.config
@@ -120,7 +120,7 @@ train_config: {

 train_input_reader: {
  tf_record_input_reader {
-    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
  }
  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
 }
@@ -134,7 +134,7 @@ eval_config: {

 eval_input_reader: {
  tf_record_input_reader {
-    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
  }
  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
  shuffle: false

--- a/research/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_cosine_lr_coco.config
+++ b/research/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_cosine_lr_coco.config
@@ -110,7 +110,7 @@ train_config: {

 train_input_reader: {
  tf_record_input_reader {
-    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
  }
  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
 }
@@ -124,7 +124,7 @@ eval_config: {

 eval_input_reader: {
  tf_record_input_reader {
-    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
  }
  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
  shuffle: false

--- a/research/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config
+++ b/research/object_detection/samples/configs/faster_rcnn_inception_resnet_v2_atrous_pets.config
@@ -121,7 +121,7 @@ train_config: {

 train_input_reader: {
  tf_record_input_reader {
-    input_path: "PATH_TO_BE_CONFIGURED/pet_faces_train.record-?????-of-?????"
+    input_path: "PATH_TO_BE_CONFIGURED/pet_faces_train.record-?????-of-00010"
  }
  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
 }
@@ -133,7 +133,7 @@ eval_config: {

 eval_input_reader: {
  tf_record_input_reader {
-    input_path: "PATH_TO_BE_CONFIGURED/pet_faces_val.record-?????-of-?????"
+    input_path: "PATH_TO_BE_CONFIGURED/pet_faces_val.record-?????-of-00010"
  }
  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
  shuffle: false

--- a/research/object_detection/samples/configs/faster_rcnn_inception_v2_coco.config
+++ b/research/object_detection/samples/configs/faster_rcnn_inception_v2_coco.config
@@ -119,7 +119,7 @@ train_config: {

 train_input_reader: {
  tf_record_input_reader {
-    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record"
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
  }
  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
 }
@@ -133,7 +133,7 @@ eval_config: {

 eval_input_reader: {
  tf_record_input_reader {
-    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record"
+    input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
  }
  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
  shuffle: false