Merge branch 'master' of github.com:tensorflow/models

f282f6ef · Alexander Gorban · 58a5da7b · a2970b03 · f282f6ef · f282f6ef
Commit f282f6ef authored Jul 05, 2017 by Alexander Gorban
20 changed files
--- a/object_detection/models/feature_map_generators_test.py
+++ b/object_detection/models/feature_map_generators_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for feature map generators."""
+import tensorflow as tf
+from object_detection.models import feature_map_generators
+INCEPTION_V2_LAYOUT = {
+    'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
+    'layer_depth': [-1, -1, -1, 512, 256, 256],
+    'anchor_strides': [16, 32, 64, -1, -1, -1],
+    'layer_target_norm': [20.0, -1, -1, -1, -1, -1],
+}
+INCEPTION_V3_LAYOUT = {
+    'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
+    'layer_depth': [-1, -1, -1, 512, 256, 128],
+    'anchor_strides': [16, 32, 64, -1, -1, -1],
+    'aspect_ratios': [1.0, 2.0, 1.0/2, 3.0, 1.0/3]
+}
+# TODO: add tests with different anchor strides.
+class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
+  def test_get_expected_feature_map_shapes_with_inception_v2(self):
+    image_features = {
+        'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
+        'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
+        'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
+    }
+    feature_maps = feature_map_generators.multi_resolution_feature_maps(
+        feature_map_layout=INCEPTION_V2_LAYOUT,
+        depth_multiplier=1,
+        min_depth=32,
+        insert_1x1_conv=True,
+        image_features=image_features)
+    expected_feature_map_shapes = {
+        'Mixed_3c': (4, 28, 28, 256),
+        'Mixed_4c': (4, 14, 14, 576),
+        'Mixed_5c': (4, 7, 7, 1024),
+        'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
+        'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
+        'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)}
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      out_feature_maps = sess.run(feature_maps)
+      out_feature_map_shapes = dict(
+          (key, value.shape) for key, value in out_feature_maps.iteritems())
+      self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
+  def test_get_expected_feature_map_shapes_with_inception_v3(self):
+    image_features = {
+        'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
+        'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
+        'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
+    }
+    feature_maps = feature_map_generators.multi_resolution_feature_maps(
+        feature_map_layout=INCEPTION_V3_LAYOUT,
+        depth_multiplier=1,
+        min_depth=32,
+        insert_1x1_conv=True,
+        image_features=image_features)
+    expected_feature_map_shapes = {
+        'Mixed_5d': (4, 35, 35, 256),
+        'Mixed_6e': (4, 17, 17, 576),
+        'Mixed_7c': (4, 8, 8, 1024),
+        'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
+        'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
+        'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)}
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      out_feature_maps = sess.run(feature_maps)
+      out_feature_map_shapes = dict(
+          (key, value.shape) for key, value in out_feature_maps.iteritems())
+      self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
+class GetDepthFunctionTest(tf.test.TestCase):
+  def test_return_min_depth_when_multiplier_is_small(self):
+    depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5,
+                                                   min_depth=16)
+    self.assertEqual(depth_fn(16), 16)
+  def test_return_correct_depth_with_multiplier(self):
+    depth_fn = feature_map_generators.get_depth_fn(depth_multiplier=0.5,
+                                                   min_depth=16)
+    self.assertEqual(depth_fn(64), 32)
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/models/ssd_feature_extractor_test.py
+++ b/object_detection/models/ssd_feature_extractor_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Base test class SSDFeatureExtractors."""
+from abc import abstractmethod
+import numpy as np
+import tensorflow as tf
+class SsdFeatureExtractorTestBase(object):
+  def _validate_features_shape(self,
+                               feature_extractor,
+                               preprocessed_inputs,
+                               expected_feature_map_shapes):
+    """Checks the extracted features are of correct shape.
+    Args:
+      feature_extractor: The feature extractor to test.
+      preprocessed_inputs: A [batch, height, width, 3] tensor to extract
+                           features with.
+      expected_feature_map_shapes: The expected shape of the extracted features.
+    """
+    feature_maps = feature_extractor.extract_features(preprocessed_inputs)
+    feature_map_shapes = [tf.shape(feature_map) for feature_map in feature_maps]
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      feature_map_shapes_out = sess.run(feature_map_shapes)
+      for shape_out, exp_shape_out in zip(
+          feature_map_shapes_out, expected_feature_map_shapes):
+        self.assertAllEqual(shape_out, exp_shape_out)
+  @abstractmethod
+  def _create_feature_extractor(self, depth_multiplier):
+    """Constructs a new feature extractor.
+    Args:
+      depth_multiplier: float depth multiplier for feature extractor
+    Returns:
+      an ssd_meta_arch.SSDFeatureExtractor object.
+    """
+    pass
+  def check_extract_features_returns_correct_shape(
+      self,
+      image_height,
+      image_width,
+      depth_multiplier,
+      expected_feature_map_shapes_out):
+    feature_extractor = self._create_feature_extractor(depth_multiplier)
+    preprocessed_inputs = tf.random_uniform(
+        [4, image_height, image_width, 3], dtype=tf.float32)
+    self._validate_features_shape(
+        feature_extractor, preprocessed_inputs, expected_feature_map_shapes_out)
+  def check_extract_features_raises_error_with_invalid_image_size(
+      self,
+      image_height,
+      image_width,
+      depth_multiplier):
+    feature_extractor = self._create_feature_extractor(depth_multiplier)
+    preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
+    feature_maps = feature_extractor.extract_features(preprocessed_inputs)
+    test_preprocessed_image = np.random.rand(4, image_height, image_width, 3)
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      with self.assertRaises(tf.errors.InvalidArgumentError):
+        sess.run(feature_maps,
+                 feed_dict={preprocessed_inputs: test_preprocessed_image})
+  def check_feature_extractor_variables_under_scope(self,
+                                                    depth_multiplier,
+                                                    scope_name):
+    g = tf.Graph()
+    with g.as_default():
+      feature_extractor = self._create_feature_extractor(depth_multiplier)
+      preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
+      feature_extractor.extract_features(preprocessed_inputs)
+      variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+      for variable in variables:
+        self.assertTrue(variable.name.startswith(scope_name))
--- a/object_detection/models/ssd_inception_v2_feature_extractor.py
+++ b/object_detection/models/ssd_inception_v2_feature_extractor.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SSDFeatureExtractor for InceptionV2 features."""
+import tensorflow as tf
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import feature_map_generators
+from nets import inception_v2
+slim = tf.contrib.slim
+class SSDInceptionV2FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
+  """SSD Feature Extractor using InceptionV2 features."""
+  def __init__(self,
+               depth_multiplier,
+               min_depth,
+               conv_hyperparams,
+               reuse_weights=None):
+    """InceptionV2 Feature Extractor for SSD Models.
+    Args:
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      reuse_weights: Whether to reuse variables. Default is None.
+    """
+    super(SSDInceptionV2FeatureExtractor, self).__init__(
+        depth_multiplier, min_depth, conv_hyperparams, reuse_weights)
+  def preprocess(self, resized_inputs):
+    """SSD preprocessing.
+    Maps pixel values to the range [-1, 1].
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    return (2.0 / 255.0) * resized_inputs - 1.0
+  def extract_features(self, preprocessed_inputs):
+    """Extract features from preprocessed inputs.
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+    """
+    preprocessed_inputs.get_shape().assert_has_rank(4)
+    shape_assert = tf.Assert(
+        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
+                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
+        ['image size must at least be 33 in both height and width.'])
+    feature_map_layout = {
+        'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
+        'layer_depth': [-1, -1, 512, 256, 256, 128],
+    }
+    with tf.control_dependencies([shape_assert]):
+      with slim.arg_scope(self._conv_hyperparams):
+        with tf.variable_scope('InceptionV2',
+                               reuse=self._reuse_weights) as scope:
+          _, image_features = inception_v2.inception_v2_base(
+              preprocessed_inputs,
+              final_endpoint='Mixed_5c',
+              min_depth=self._min_depth,
+              depth_multiplier=self._depth_multiplier,
+              scope=scope)
+          feature_maps = feature_map_generators.multi_resolution_feature_maps(
+              feature_map_layout=feature_map_layout,
+              depth_multiplier=self._depth_multiplier,
+              min_depth=self._min_depth,
+              insert_1x1_conv=True,
+              image_features=image_features)
+    return feature_maps.values()
--- a/object_detection/models/ssd_inception_v2_feature_extractor_test.py
+++ b/object_detection/models/ssd_inception_v2_feature_extractor_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.models.ssd_inception_v2_feature_extractor."""
+import numpy as np
+import tensorflow as tf
+from object_detection.models import ssd_feature_extractor_test
+from object_detection.models import ssd_inception_v2_feature_extractor
+class SsdInceptionV2FeatureExtractorTest(
+    ssd_feature_extractor_test.SsdFeatureExtractorTestBase,
+    tf.test.TestCase):
+  def _create_feature_extractor(self, depth_multiplier):
+    """Constructs a SsdInceptionV2FeatureExtractor.
+    Args:
+      depth_multiplier: float depth multiplier for feature extractor
+    Returns:
+      an ssd_inception_v2_feature_extractor.SsdInceptionV2FeatureExtractor.
+    """
+    min_depth = 32
+    conv_hyperparams = {}
+    return ssd_inception_v2_feature_extractor.SSDInceptionV2FeatureExtractor(
+        depth_multiplier, min_depth, conv_hyperparams)
+  def test_extract_features_returns_correct_shapes_128(self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1.0
+    expected_feature_map_shape = [(4, 8, 8, 576), (4, 4, 4, 1024),
+                                  (4, 2, 2, 512), (4, 1, 1, 256),
+                                  (4, 1, 1, 256), (4, 1, 1, 128)]
+    self.check_extract_features_returns_correct_shape(
+        image_height, image_width, depth_multiplier, expected_feature_map_shape)
+  def test_extract_features_returns_correct_shapes_299(self):
+    image_height = 299
+    image_width = 299
+    depth_multiplier = 1.0
+    expected_feature_map_shape = [(4, 19, 19, 576), (4, 10, 10, 1024),
+                                  (4, 5, 5, 512), (4, 3, 3, 256),
+                                  (4, 2, 2, 256), (4, 1, 1, 128)]
+    self.check_extract_features_returns_correct_shape(
+        image_height, image_width, depth_multiplier, expected_feature_map_shape)
+  def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
+    image_height = 299
+    image_width = 299
+    depth_multiplier = 0.5**12
+    expected_feature_map_shape = [(4, 19, 19, 128), (4, 10, 10, 128),
+                                  (4, 5, 5, 32), (4, 3, 3, 32),
+                                  (4, 2, 2, 32), (4, 1, 1, 32)]
+    self.check_extract_features_returns_correct_shape(
+        image_height, image_width, depth_multiplier, expected_feature_map_shape)
+  def test_extract_features_raises_error_with_invalid_image_size(self):
+    image_height = 32
+    image_width = 32
+    depth_multiplier = 1.0
+    self.check_extract_features_raises_error_with_invalid_image_size(
+        image_height, image_width, depth_multiplier)
+  def test_preprocess_returns_correct_value_range(self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1
+    test_image = np.random.rand(4, image_height, image_width, 3)
+    feature_extractor = self._create_feature_extractor(depth_multiplier)
+    preprocessed_image = feature_extractor.preprocess(test_image)
+    self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
+  def test_variables_only_created_in_scope(self):
+    depth_multiplier = 1
+    scope_name = 'InceptionV2'
+    self.check_feature_extractor_variables_under_scope(depth_multiplier,
+                                                       scope_name)
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
+++ b/object_detection/models/ssd_mobilenet_v1_feature_extractor.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SSDFeatureExtractor for MobilenetV1 features."""
+import tensorflow as tf
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import feature_map_generators
+from nets import mobilenet_v1
+slim = tf.contrib.slim
+class SSDMobileNetV1FeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
+  """SSD Feature Extractor using MobilenetV1 features."""
+  def __init__(self,
+               depth_multiplier,
+               min_depth,
+               conv_hyperparams,
+               reuse_weights=None):
+    """MobileNetV1 Feature Extractor for SSD Models.
+    Args:
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      reuse_weights: Whether to reuse variables. Default is None.
+    """
+    super(SSDMobileNetV1FeatureExtractor, self).__init__(
+        depth_multiplier, min_depth, conv_hyperparams, reuse_weights)
+  def preprocess(self, resized_inputs):
+    """SSD preprocessing.
+    Maps pixel values to the range [-1, 1].
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    return (2.0 / 255.0) * resized_inputs - 1.0
+  def extract_features(self, preprocessed_inputs):
+    """Extract features from preprocessed inputs.
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+    """
+    preprocessed_inputs.get_shape().assert_has_rank(4)
+    shape_assert = tf.Assert(
+        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
+                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
+        ['image size must at least be 33 in both height and width.'])
+    feature_map_layout = {
+        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
+                       '', ''],
+        'layer_depth': [-1, -1, 512, 256, 256, 128],
+    }
+    with tf.control_dependencies([shape_assert]):
+      with slim.arg_scope(self._conv_hyperparams):
+        with tf.variable_scope('MobilenetV1',
+                               reuse=self._reuse_weights) as scope:
+          _, image_features = mobilenet_v1.mobilenet_v1_base(
+              preprocessed_inputs,
+              final_endpoint='Conv2d_13_pointwise',
+              min_depth=self._min_depth,
+              depth_multiplier=self._depth_multiplier,
+              scope=scope)
+          feature_maps = feature_map_generators.multi_resolution_feature_maps(
+              feature_map_layout=feature_map_layout,
+              depth_multiplier=self._depth_multiplier,
+              min_depth=self._min_depth,
+              insert_1x1_conv=True,
+              image_features=image_features)
+    return feature_maps.values()
--- a/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
+++ b/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ssd_mobilenet_v1_feature_extractor."""
+import numpy as np
+import tensorflow as tf
+from object_detection.models import ssd_feature_extractor_test
+from object_detection.models import ssd_mobilenet_v1_feature_extractor
+class SsdMobilenetV1FeatureExtractorTest(
+    ssd_feature_extractor_test.SsdFeatureExtractorTestBase, tf.test.TestCase):
+  def _create_feature_extractor(self, depth_multiplier):
+    """Constructs a new feature extractor.
+    Args:
+      depth_multiplier: float depth multiplier for feature extractor
+    Returns:
+      an ssd_meta_arch.SSDFeatureExtractor object.
+    """
+    min_depth = 32
+    conv_hyperparams = {}
+    return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
+        depth_multiplier, min_depth, conv_hyperparams)
+  def test_extract_features_returns_correct_shapes_128(self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1.0
+    expected_feature_map_shape = [(4, 8, 8, 512), (4, 4, 4, 1024),
+                                  (4, 2, 2, 512), (4, 1, 1, 256),
+                                  (4, 1, 1, 256), (4, 1, 1, 128)]
+    self.check_extract_features_returns_correct_shape(
+        image_height, image_width, depth_multiplier, expected_feature_map_shape)
+  def test_extract_features_returns_correct_shapes_299(self):
+    image_height = 299
+    image_width = 299
+    depth_multiplier = 1.0
+    expected_feature_map_shape = [(4, 19, 19, 512), (4, 10, 10, 1024),
+                                  (4, 5, 5, 512), (4, 3, 3, 256),
+                                  (4, 2, 2, 256), (4, 1, 1, 128)]
+    self.check_extract_features_returns_correct_shape(
+        image_height, image_width, depth_multiplier, expected_feature_map_shape)
+  def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
+    image_height = 299
+    image_width = 299
+    depth_multiplier = 0.5**12
+    expected_feature_map_shape = [(4, 19, 19, 32), (4, 10, 10, 32),
+                                  (4, 5, 5, 32), (4, 3, 3, 32),
+                                  (4, 2, 2, 32), (4, 1, 1, 32)]
+    self.check_extract_features_returns_correct_shape(
+        image_height, image_width, depth_multiplier, expected_feature_map_shape)
+  def test_extract_features_raises_error_with_invalid_image_size(self):
+    image_height = 32
+    image_width = 32
+    depth_multiplier = 1.0
+    self.check_extract_features_raises_error_with_invalid_image_size(
+        image_height, image_width, depth_multiplier)
+  def test_preprocess_returns_correct_value_range(self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1
+    test_image = np.random.rand(4, image_height, image_width, 3)
+    feature_extractor = self._create_feature_extractor(depth_multiplier)
+    preprocessed_image = feature_extractor.preprocess(test_image)
+    self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
+  def test_variables_only_created_in_scope(self):
+    depth_multiplier = 1
+    scope_name = 'MobilenetV1'
+    self.check_feature_extractor_variables_under_scope(depth_multiplier,
+                                                       scope_name)
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/object_detection_tutorial.ipynb
+++ b/object_detection/object_detection_tutorial.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Object Detection Demo\n",
+    "Welcome to the object detection inference walkthrough!  This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/installation.md) before you start."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import os\n",
+    "import six.moves.urllib as urllib\n",
+    "import sys\n",
+    "import tarfile\n",
+    "import tensorflow as tf\n",
+    "import zipfile\n",
+    "\n",
+    "from collections import defaultdict\n",
+    "from io import StringIO\n",
+    "from matplotlib import pyplot as plt\n",
+    "from PIL import Image"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Env setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# This is needed to display the images.\n",
+    "%matplotlib inline\n",
+    "\n",
+    "# This is needed since the notebook is stored in the object_detection folder.\n",
+    "sys.path.append(\"..\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Object detection imports\n",
+    "Here are the imports from the object detection module."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from utils import label_map_util\n",
+    "\n",
+    "from utils import visualization_utils as vis_util"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Model preparation "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Variables\n",
+    "\n",
+    "Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.  \n",
+    "\n",
+    "By default we use an \"SSD with Mobilenet\" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# What model to download.\n",
+    "MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'\n",
+    "MODEL_FILE = MODEL_NAME + '.tar.gz'\n",
+    "DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'\n",
+    "\n",
+    "# Path to frozen detection graph. This is the actual model that is used for the object detection.\n",
+    "PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'\n",
+    "\n",
+    "# List of the strings that is used to add correct label for each box.\n",
+    "PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')\n",
+    "\n",
+    "NUM_CLASSES = 90"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Download Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "opener = urllib.request.URLopener()\n",
+    "opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)\n",
+    "tar_file = tarfile.open(MODEL_FILE)\n",
+    "for file in tar_file.getmembers():\n",
+    "    file_name = os.path.basename(file.name)\n",
+    "    if 'frozen_inference_graph.pb' in file_name:\n",
+    "        tar_file.extract(file, os.getcwd())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load a (frozen) Tensorflow model into memory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "detection_graph = tf.Graph()\n",
+    "with detection_graph.as_default():\n",
+    "    od_graph_def = tf.GraphDef()\n",
+    "    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:\n",
+    "        serialized_graph = fid.read()\n",
+    "        od_graph_def.ParseFromString(serialized_graph)\n",
+    "        tf.import_graph_def(od_graph_def, name='')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading label map\n",
+    "Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "label_map = label_map_util.load_labelmap(PATH_TO_LABELS)\n",
+    "categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)\n",
+    "category_index = label_map_util.create_category_index(categories)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Helper code"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def load_image_into_numpy_array(image):\n",
+    "  (im_width, im_height) = image.size\n",
+    "  return np.array(image.getdata()).reshape(\n",
+    "      (im_height, im_width, 3)).astype(np.uint8)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Detection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# For the sake of simplicity we will use only 2 images:\n",
+    "# image1.jpg\n",
+    "# image2.jpg\n",
+    "# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.\n",
+    "PATH_TO_TEST_IMAGES_DIR = 'test_images'\n",
+    "TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]\n",
+    "\n",
+    "# Size, in inches, of the output images.\n",
+    "IMAGE_SIZE = (12, 8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "with detection_graph.as_default():\n",
+    "  with tf.Session(graph=detection_graph) as sess:\n",
+    "    for image_path in TEST_IMAGE_PATHS:\n",
+    "      image = Image.open(image_path)\n",
+    "      # the array based representation of the image will be used later in order to prepare the\n",
+    "      # result image with boxes and labels on it.\n",
+    "      image_np = load_image_into_numpy_array(image)\n",
+    "      # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
+    "      image_np_expanded = np.expand_dims(image_np, axis=0)\n",
+    "      image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')\n",
+    "      # Each box represents a part of the image where a particular object was detected.\n",
+    "      boxes = detection_graph.get_tensor_by_name('detection_boxes:0')\n",
+    "      # Each score represent how level of confidence for each of the objects.\n",
+    "      # Score is shown on the result image, together with the class label.\n",
+    "      scores = detection_graph.get_tensor_by_name('detection_scores:0')\n",
+    "      classes = detection_graph.get_tensor_by_name('detection_classes:0')\n",
+    "      num_detections = detection_graph.get_tensor_by_name('num_detections:0')\n",
+    "      # Actual detection.\n",
+    "      (boxes, scores, classes, num_detections) = sess.run(\n",
+    "          [boxes, scores, classes, num_detections],\n",
+    "          feed_dict={image_tensor: image_np_expanded})\n",
+    "      # Visualization of the results of a detection.\n",
+    "      vis_util.visualize_boxes_and_labels_on_image_array(\n",
+    "          image_np,\n",
+    "          np.squeeze(boxes),\n",
+    "          np.squeeze(classes).astype(np.int32),\n",
+    "          np.squeeze(scores),\n",
+    "          category_index,\n",
+    "          use_normalized_coordinates=True,\n",
+    "          line_thickness=8)\n",
+    "      plt.figure(figsize=IMAGE_SIZE)\n",
+    "      plt.imshow(image_np)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/object_detection/protos/BUILD
+++ b/object_detection/protos/BUILD
+# Tensorflow Object Detection API: Configuration protos.
+package(
+    default_visibility = ["//visibility:public"],
+)
+licenses(["notice"])
+proto_library(
+    name = "argmax_matcher_proto",
+    srcs = ["argmax_matcher.proto"],
+)
+py_proto_library(
+    name = "argmax_matcher_py_pb2",
+    api_version = 2,
+    deps = [":argmax_matcher_proto"],
+)
+proto_library(
+    name = "bipartite_matcher_proto",
+    srcs = ["bipartite_matcher.proto"],
+)
+py_proto_library(
+    name = "bipartite_matcher_py_pb2",
+    api_version = 2,
+    deps = [":bipartite_matcher_proto"],
+)
+proto_library(
+    name = "matcher_proto",
+    srcs = ["matcher.proto"],
+    deps = [
+        ":argmax_matcher_proto",
+        ":bipartite_matcher_proto",
+    ],
+)
+py_proto_library(
+    name = "matcher_py_pb2",
+    api_version = 2,
+    deps = [":matcher_proto"],
+)
+proto_library(
+    name = "faster_rcnn_box_coder_proto",
+    srcs = ["faster_rcnn_box_coder.proto"],
+)
+py_proto_library(
+    name = "faster_rcnn_box_coder_py_pb2",
+    api_version = 2,
+    deps = [":faster_rcnn_box_coder_proto"],
+)
+proto_library(
+    name = "mean_stddev_box_coder_proto",
+    srcs = ["mean_stddev_box_coder.proto"],
+)
+py_proto_library(
+    name = "mean_stddev_box_coder_py_pb2",
+    api_version = 2,
+    deps = [":mean_stddev_box_coder_proto"],
+)
+proto_library(
+    name = "square_box_coder_proto",
+    srcs = ["square_box_coder.proto"],
+)
+py_proto_library(
+    name = "square_box_coder_py_pb2",
+    api_version = 2,
+    deps = [":square_box_coder_proto"],
+)
+proto_library(
+    name = "box_coder_proto",
+    srcs = ["box_coder.proto"],
+    deps = [
+        ":faster_rcnn_box_coder_proto",
+        ":mean_stddev_box_coder_proto",
+        ":square_box_coder_proto",
+    ],
+)
+py_proto_library(
+    name = "box_coder_py_pb2",
+    api_version = 2,
+    deps = [":box_coder_proto"],
+)
+proto_library(
+    name = "grid_anchor_generator_proto",
+    srcs = ["grid_anchor_generator.proto"],
+)
+py_proto_library(
+    name = "grid_anchor_generator_py_pb2",
+    api_version = 2,
+    deps = [":grid_anchor_generator_proto"],
+)
+proto_library(
+    name = "ssd_anchor_generator_proto",
+    srcs = ["ssd_anchor_generator.proto"],
+)
+py_proto_library(
+    name = "ssd_anchor_generator_py_pb2",
+    api_version = 2,
+    deps = [":ssd_anchor_generator_proto"],
+)
+proto_library(
+    name = "anchor_generator_proto",
+    srcs = ["anchor_generator.proto"],
+    deps = [
+        ":grid_anchor_generator_proto",
+        ":ssd_anchor_generator_proto",
+    ],
+)
+py_proto_library(
+    name = "anchor_generator_py_pb2",
+    api_version = 2,
+    deps = [":anchor_generator_proto"],
+)
+proto_library(
+    name = "input_reader_proto",
+    srcs = ["input_reader.proto"],
+)
+py_proto_library(
+    name = "input_reader_py_pb2",
+    api_version = 2,
+    deps = [":input_reader_proto"],
+)
+proto_library(
+    name = "losses_proto",
+    srcs = ["losses.proto"],
+)
+py_proto_library(
+    name = "losses_py_pb2",
+    api_version = 2,
+    deps = [":losses_proto"],
+)
+proto_library(
+    name = "optimizer_proto",
+    srcs = ["optimizer.proto"],
+)
+py_proto_library(
+    name = "optimizer_py_pb2",
+    api_version = 2,
+    deps = [":optimizer_proto"],
+)
+proto_library(
+    name = "post_processing_proto",
+    srcs = ["post_processing.proto"],
+)
+py_proto_library(
+    name = "post_processing_py_pb2",
+    api_version = 2,
+    deps = [":post_processing_proto"],
+)
+proto_library(
+    name = "hyperparams_proto",
+    srcs = ["hyperparams.proto"],
+)
+py_proto_library(
+    name = "hyperparams_py_pb2",
+    api_version = 2,
+    deps = [":hyperparams_proto"],
+)
+proto_library(
+    name = "box_predictor_proto",
+    srcs = ["box_predictor.proto"],
+    deps = [":hyperparams_proto"],
+)
+py_proto_library(
+    name = "box_predictor_py_pb2",
+    api_version = 2,
+    deps = [":box_predictor_proto"],
+)
+proto_library(
+    name = "region_similarity_calculator_proto",
+    srcs = ["region_similarity_calculator.proto"],
+    deps = [],
+)
+py_proto_library(
+    name = "region_similarity_calculator_py_pb2",
+    api_version = 2,
+    deps = [":region_similarity_calculator_proto"],
+)
+proto_library(
+    name = "preprocessor_proto",
+    srcs = ["preprocessor.proto"],
+)
+py_proto_library(
+    name = "preprocessor_py_pb2",
+    api_version = 2,
+    deps = [":preprocessor_proto"],
+)
+proto_library(
+    name = "train_proto",
+    srcs = ["train.proto"],
+    deps = [
+        ":optimizer_proto",
+        ":preprocessor_proto",
+    ],
+)
+py_proto_library(
+    name = "train_py_pb2",
+    api_version = 2,
+    deps = [":train_proto"],
+)
+proto_library(
+    name = "eval_proto",
+    srcs = ["eval.proto"],
+)
+py_proto_library(
+    name = "eval_py_pb2",
+    api_version = 2,
+    deps = [":eval_proto"],
+)
+proto_library(
+    name = "image_resizer_proto",
+    srcs = ["image_resizer.proto"],
+)
+py_proto_library(
+    name = "image_resizer_py_pb2",
+    api_version = 2,
+    deps = [":image_resizer_proto"],
+)
+proto_library(
+    name = "faster_rcnn_proto",
+    srcs = ["faster_rcnn.proto"],
+    deps = [
+        ":box_predictor_proto",
+        "//object_detection/protos:anchor_generator_proto",
+        "//object_detection/protos:hyperparams_proto",
+        "//object_detection/protos:image_resizer_proto",
+        "//object_detection/protos:losses_proto",
+        "//object_detection/protos:post_processing_proto",
+    ],
+)
+proto_library(
+    name = "ssd_proto",
+    srcs = ["ssd.proto"],
+    deps = [
+        ":anchor_generator_proto",
+        ":box_coder_proto",
+        ":box_predictor_proto",
+        ":hyperparams_proto",
+        ":image_resizer_proto",
+        ":losses_proto",
+        ":matcher_proto",
+        ":post_processing_proto",
+        ":region_similarity_calculator_proto",
+    ],
+)
+proto_library(
+    name = "model_proto",
+    srcs = ["model.proto"],
+    deps = [
+        ":faster_rcnn_proto",
+        ":ssd_proto",
+    ],
+)
+py_proto_library(
+    name = "model_py_pb2",
+    api_version = 2,
+    deps = [":model_proto"],
+)
+proto_library(
+    name = "pipeline_proto",
+    srcs = ["pipeline.proto"],
+    deps = [
+        ":eval_proto",
+        ":input_reader_proto",
+        ":model_proto",
+        ":train_proto",
+    ],
+)
+py_proto_library(
+    name = "pipeline_py_pb2",
+    api_version = 2,
+    deps = [":pipeline_proto"],
+)
+proto_library(
+    name = "string_int_label_map_proto",
+    srcs = ["string_int_label_map.proto"],
+)
+py_proto_library(
+    name = "string_int_label_map_py_pb2",
+    api_version = 2,
+    deps = [":string_int_label_map_proto"],
+)
--- a/object_detection/protos/__init__.py
+++ b/object_detection/protos/__init__.py
--- a/object_detection/protos/anchor_generator.proto
+++ b/object_detection/protos/anchor_generator.proto
+syntax = "proto2";
+package object_detection.protos;
+import "object_detection/protos/grid_anchor_generator.proto";
+import "object_detection/protos/ssd_anchor_generator.proto";
+// Configuration proto for the anchor generator to use in the object detection
+// pipeline. See core/anchor_generator.py for details.
+message AnchorGenerator {
+  oneof anchor_generator_oneof {
+    GridAnchorGenerator grid_anchor_generator = 1;
+    SsdAnchorGenerator ssd_anchor_generator = 2;
+  }
+}
--- a/object_detection/protos/argmax_matcher.proto
+++ b/object_detection/protos/argmax_matcher.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for ArgMaxMatcher. See
+// matchers/argmax_matcher.py for details.
+message ArgMaxMatcher {
+  // Threshold for positive matches.
+  optional float matched_threshold = 1 [default = 0.5];
+  // Threshold for negative matches.
+  optional float unmatched_threshold = 2 [default = 0.5];
+  // Whether to construct ArgMaxMatcher without thresholds.
+  optional bool ignore_thresholds = 3 [default = false];
+  // If True then negative matches are the ones below the unmatched_threshold,
+  // whereas ignored matches are in between the matched and umatched
+  // threshold. If False, then negative matches are in between the matched
+  // and unmatched threshold, and everything lower than unmatched is ignored.
+  optional bool negatives_lower_than_unmatched = 4 [default = true];
+  // Whether to ensure each row is matched to at least one column.
+  optional bool force_match_for_each_row = 5 [default = false];
+}
--- a/object_detection/protos/bipartite_matcher.proto
+++ b/object_detection/protos/bipartite_matcher.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for bipartite matcher. See
+// matchers/bipartite_matcher.py for details.
+message BipartiteMatcher {
+}
--- a/object_detection/protos/box_coder.proto
+++ b/object_detection/protos/box_coder.proto
+syntax = "proto2";
+package object_detection.protos;
+import "object_detection/protos/faster_rcnn_box_coder.proto";
+import "object_detection/protos/mean_stddev_box_coder.proto";
+import "object_detection/protos/square_box_coder.proto";
+// Configuration proto for the box coder to be used in the object detection
+// pipeline. See core/box_coder.py for details.
+message BoxCoder {
+  oneof box_coder_oneof {
+    FasterRcnnBoxCoder faster_rcnn_box_coder = 1;
+    MeanStddevBoxCoder mean_stddev_box_coder = 2;
+    SquareBoxCoder square_box_coder = 3;
+  }
+}
--- a/object_detection/protos/box_predictor.proto
+++ b/object_detection/protos/box_predictor.proto
+syntax = "proto2";
+package object_detection.protos;
+import "object_detection/protos/hyperparams.proto";
+// Configuration proto for box predictor. See core/box_predictor.py for details.
+message BoxPredictor {
+  oneof box_predictor_oneof {
+    ConvolutionalBoxPredictor convolutional_box_predictor = 1;
+    MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2;
+    RfcnBoxPredictor rfcn_box_predictor = 3;
+  }
+}
+// Configuration proto for Convolutional box predictor.
+message ConvolutionalBoxPredictor {
+  // Hyperparameters for convolution ops used in the box predictor.
+  optional Hyperparams conv_hyperparams = 1;
+  // Minumum feature depth prior to predicting box encodings and class
+  // predictions.
+  optional int32 min_depth = 2 [default = 0];
+  // Maximum feature depth prior to predicting box encodings and class
+  // predictions. If max_depth is set to 0, no additional feature map will be
+  // inserted before location and class predictions.
+  optional int32 max_depth = 3 [default = 0];
+  // Number of the additional conv layers before the predictor.
+  optional int32 num_layers_before_predictor = 4 [default = 0];
+  // Whether to use dropout for class prediction.
+  optional bool use_dropout = 5 [default = true];
+  // Keep probability for dropout
+  optional float dropout_keep_probability = 6 [default = 0.8];
+  // Size of final convolution kernel. If the spatial resolution of the feature
+  // map is smaller than the kernel size, then the kernel size is set to
+  // min(feature_width, feature_height).
+  optional int32 kernel_size = 7 [default = 1];
+  // Size of the encoding for boxes.
+  optional int32 box_code_size = 8 [default = 4];
+  // Whether to apply sigmoid to the output of class predictions.
+  // TODO: Do we need this since we have a post processing module.?
+  optional bool apply_sigmoid_to_scores = 9 [default = false];
+}
+message MaskRCNNBoxPredictor {
+  // Hyperparameters for fully connected ops used in the box predictor.
+  optional Hyperparams fc_hyperparams = 1;
+  // Whether to use dropout op prior to the both box and class predictions.
+  optional bool use_dropout = 2 [default= false];
+  // Keep probability for dropout. This is only used if use_dropout is true.
+  optional float dropout_keep_probability = 3 [default = 0.5];
+  // Size of the encoding for the boxes.
+  optional int32 box_code_size = 4 [default = 4];
+  // Hyperparameters for convolution ops used in the box predictor.
+  optional Hyperparams conv_hyperparams = 5;
+  // Whether to predict instance masks inside detection boxes.
+  optional bool predict_instance_masks = 6 [default = false];
+  // The depth for the first conv2d_transpose op  applied to the
+  // image_features in the mask prediciton branch
+  optional int32 mask_prediction_conv_depth = 7 [default = 256];
+  // Whether to predict keypoints inside detection boxes.
+  optional bool predict_keypoints = 8 [default = false];
+}
+message RfcnBoxPredictor {
+  // Hyperparameters for convolution ops used in the box predictor.
+  optional Hyperparams conv_hyperparams = 1;
+  // Bin sizes for RFCN crops.
+  optional int32 num_spatial_bins_height = 2 [default = 3];
+  optional int32 num_spatial_bins_width = 3 [default = 3];
+  // Target depth to reduce the input image features to.
+  optional int32 depth = 4 [default=1024];
+  // Size of the encoding for the boxes.
+  optional int32 box_code_size = 5 [default = 4];
+  // Size to resize the rfcn crops to.
+  optional int32 crop_height = 6 [default= 12];
+  optional int32 crop_width = 7 [default=12];
+}
--- a/object_detection/protos/eval.proto
+++ b/object_detection/protos/eval.proto
+syntax = "proto2";
+package object_detection.protos;
+// Message for configuring DetectionModel evaluation jobs (eval.py).
+message EvalConfig {
+  // Number of visualization images to generate.
+  optional uint32 num_visualizations = 1 [default=10];
+  // Number of examples to process of evaluation.
+  optional uint32 num_examples = 2 [default=5000];
+  // How often to run evaluation.
+  optional uint32 eval_interval_secs = 3 [default=300];
+  // Maximum number of times to run evaluation. If set to 0, will run forever.
+  optional uint32 max_evals = 4 [default=0];
+  // Whether the TensorFlow graph used for evaluation should be saved to disk.
+  optional bool save_graph = 5 [default=false];
+  // Path to directory to store visualizations in. If empty, visualization
+  // images are not exported (only shown on Tensorboard).
+  optional string visualization_export_dir = 6 [default=""];
+  // BNS name of the TensorFlow master.
+  optional string eval_master = 7 [default=""];
+  // Type of metrics to use for evaluation. Currently supports only Pascal VOC
+  // detection metrics.
+  optional string metrics_set = 8 [default="pascal_voc_metrics"];
+  // Path to export detections to COCO compatible JSON format.
+  optional string export_path = 9 [default=''];
+  // Option to not read groundtruth labels and only export detections to
+  // COCO-compatible JSON file.
+  optional bool ignore_groundtruth = 10 [default=false];
+  // Use exponential moving averages of variables for evaluation.
+  // TODO: When this is false make sure the model is constructed
+  // without moving averages in restore_fn.
+  optional bool use_moving_averages = 11 [default=false];
+  // Whether to evaluate instance masks.
+  optional bool eval_instance_masks = 12 [default=false];
+}
--- a/object_detection/protos/faster_rcnn.proto
+++ b/object_detection/protos/faster_rcnn.proto
+syntax = "proto2";
+package object_detection.protos;
+import "object_detection/protos/anchor_generator.proto";
+import "object_detection/protos/box_predictor.proto";
+import "object_detection/protos/hyperparams.proto";
+import "object_detection/protos/image_resizer.proto";
+import "object_detection/protos/losses.proto";
+import "object_detection/protos/post_processing.proto";
+// Configuration for Faster R-CNN models.
+// See meta_architectures/faster_rcnn_meta_arch.py and models/model_builder.py
+//
+// Naming conventions:
+// Faster R-CNN models have two stages: a first stage region proposal network
+// (or RPN) and a second stage box classifier.  We thus use the prefixes
+// `first_stage_` and `second_stage_` to indicate the stage to which each
+// parameter pertains when relevant.
+message FasterRcnn {
+  // Whether to construct only the Region Proposal Network (RPN).
+  optional bool first_stage_only = 1 [default=false];
+  // Number of classes to predict.
+  optional int32 num_classes = 3;
+  // Image resizer for preprocessing the input image.
+  optional ImageResizer image_resizer = 4;
+  // Feature extractor config.
+  optional FasterRcnnFeatureExtractor feature_extractor = 5;
+  // (First stage) region proposal network (RPN) parameters.
+  // Anchor generator to compute RPN anchors.
+  optional AnchorGenerator first_stage_anchor_generator = 6;
+  // Atrous rate for the convolution op applied to the
+  // `first_stage_features_to_crop` tensor to obtain box predictions.
+  optional int32 first_stage_atrous_rate = 7 [default=1];
+  // Hyperparameters for the convolutional RPN box predictor.
+  optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8;
+  // Kernel size to use for the convolution op just prior to RPN box
+  // predictions.
+  optional int32 first_stage_box_predictor_kernel_size = 9 [default=3];
+  // Output depth for the convolution op just prior to RPN box predictions.
+  optional int32 first_stage_box_predictor_depth = 10 [default=512];
+  // The batch size to use for computing the first stage objectness and
+  // location losses.
+  optional int32 first_stage_minibatch_size = 11 [default=256];
+  // Fraction of positive examples per image for the RPN.
+  optional float first_stage_positive_balance_fraction = 12 [default=0.5];
+  // Non max suppression score threshold applied to first stage RPN proposals.
+  optional float first_stage_nms_score_threshold = 13 [default=0.0];
+  // Non max suppression IOU threshold applied to first stage RPN proposals.
+  optional float first_stage_nms_iou_threshold = 14 [default=0.7];
+  // Maximum number of RPN proposals retained after first stage postprocessing.
+  optional int32 first_stage_max_proposals = 15 [default=300];
+  // First stage RPN localization loss weight.
+  optional float first_stage_localization_loss_weight = 16 [default=1.0];
+  // First stage RPN objectness loss weight.
+  optional float first_stage_objectness_loss_weight = 17 [default=1.0];
+  // Per-region cropping parameters.
+  // Note that if a R-FCN model is constructed the per region cropping
+  // parameters below are ignored.
+  // Output size (width and height are set to be the same) of the initial
+  // bilinear interpolation based cropping during ROI pooling.
+  optional int32 initial_crop_size = 18;
+  // Kernel size of the max pool op on the cropped feature map during
+  // ROI pooling.
+  optional int32 maxpool_kernel_size = 19;
+  // Stride of the max pool op on the cropped feature map during ROI pooling.
+  optional int32 maxpool_stride = 20;
+  // (Second stage) box classifier parameters
+  // Hyperparameters for the second stage box predictor. If box predictor type
+  // is set to rfcn_box_predictor, a R-FCN model is constructed, otherwise a
+  // Faster R-CNN model is constructed.
+  optional BoxPredictor second_stage_box_predictor  = 21;
+  // The batch size per image used for computing the classification and refined
+  // location loss of the box classifier.
+  // Note that this field is ignored if `hard_example_miner` is configured.
+  optional int32 second_stage_batch_size = 22 [default=64];
+  // Fraction of positive examples to use per image for the box classifier.
+  optional float second_stage_balance_fraction = 23 [default=0.25];
+  // Post processing to apply on the second stage box classifier predictions.
+  // Note: the `score_converter` provided to the FasterRCNNMetaArch constructor
+  // is taken from this `second_stage_post_processing` proto.
+  optional PostProcessing second_stage_post_processing = 24;
+  // Second stage refined localization loss weight.
+  optional float second_stage_localization_loss_weight = 25 [default=1.0];
+  // Second stage classification loss weight
+  optional float second_stage_classification_loss_weight = 26 [default=1.0];
+  // If not left to default, applies hard example mining.
+  optional HardExampleMiner hard_example_miner = 27;
+}
+message FasterRcnnFeatureExtractor {
+  // Type of Faster R-CNN model (e.g., 'faster_rcnn_resnet101';
+  // See models/model_builder.py for expected types).
+  optional string type = 1;
+  // Output stride of extracted RPN feature map.
+  optional int32 first_stage_features_stride = 2 [default=16];
+}
--- a/object_detection/protos/faster_rcnn_box_coder.proto
+++ b/object_detection/protos/faster_rcnn_box_coder.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for FasterRCNNBoxCoder. See
+// box_coders/faster_rcnn_box_coder.py for details.
+message FasterRcnnBoxCoder {
+  // Scale factor for anchor encoded box center.
+  optional float y_scale = 1 [default = 10.0];
+  optional float x_scale = 2 [default = 10.0];
+  // Scale factor for anchor encoded box height.
+  optional float height_scale = 3 [default = 5.0];
+  // Scale factor for anchor encoded box width.
+  optional float width_scale = 4 [default = 5.0];
+}
--- a/object_detection/protos/grid_anchor_generator.proto
+++ b/object_detection/protos/grid_anchor_generator.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for GridAnchorGenerator. See
+// anchor_generators/grid_anchor_generator.py for details.
+message GridAnchorGenerator {
+   // Anchor height in pixels.
+  optional int32 height = 1 [default = 256];
+  // Anchor width in pixels.
+  optional int32 width = 2 [default = 256];
+  // Anchor stride in height dimension in pixels.
+  optional int32 height_stride = 3 [default = 16];
+  // Anchor stride in width dimension in pixels.
+  optional int32 width_stride = 4 [default = 16];
+  // Anchor height offset in pixels.
+  optional int32 height_offset = 5 [default = 0];
+  // Anchor width offset in pixels.
+  optional int32 width_offset = 6 [default = 0];
+  // At any given location, len(scales) * len(aspect_ratios) anchors are
+  // generated with all possible combinations of scales and aspect ratios.
+  // List of scales for the anchors.
+  repeated float scales = 7;
+  // List of aspect ratios for the anchors.
+  repeated float aspect_ratios = 8;
+}
--- a/object_detection/protos/hyperparams.proto
+++ b/object_detection/protos/hyperparams.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for the convolution op hyperparameters to use in the
+// object detection pipeline.
+message Hyperparams {
+  // Operations affected by hyperparameters.
+  enum Op {
+    // Convolution, Separable Convolution, Convolution transpose.
+    CONV = 1;
+    // Fully connected
+    FC = 2;
+  }
+  optional Op op = 1 [default = CONV];
+  // Regularizer for the weights of the convolution op.
+  optional Regularizer regularizer = 2;
+  // Initializer for the weights of the convolution op.
+  optional Initializer initializer = 3;
+  // Type of activation to apply after convolution.
+  enum Activation {
+    // Use None (no activation)
+    NONE = 0;
+    // Use tf.nn.relu
+    RELU = 1;
+    // Use tf.nn.relu6
+    RELU_6 = 2;
+  }
+  optional Activation activation = 4 [default = RELU];
+  // BatchNorm hyperparameters. If this parameter is NOT set then BatchNorm is
+  // not applied!
+  optional BatchNorm batch_norm = 5;
+}
+// Proto with one-of field for regularizers.
+message Regularizer {
+  oneof regularizer_oneof {
+    L1Regularizer l1_regularizer = 1;
+    L2Regularizer l2_regularizer = 2;
+  }
+}
+// Configuration proto for L1 Regularizer.
+// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l1_regularizer
+message L1Regularizer {
+  optional float weight = 1 [default = 1.0];
+}
+// Configuration proto for L2 Regularizer.
+// See https://www.tensorflow.org/api_docs/python/tf/contrib/layers/l2_regularizer
+message L2Regularizer {
+  optional float weight = 1 [default = 1.0];
+}
+// Proto with one-of field for initializers.
+message Initializer {
+  oneof initializer_oneof {
+    TruncatedNormalInitializer truncated_normal_initializer = 1;
+    VarianceScalingInitializer variance_scaling_initializer = 2;
+  }
+}
+// Configuration proto for truncated normal initializer. See
+// https://www.tensorflow.org/api_docs/python/tf/truncated_normal_initializer
+message TruncatedNormalInitializer {
+  optional float mean = 1 [default = 0.0];
+  optional float stddev = 2 [default = 1.0];
+}
+// Configuration proto for variance scaling initializer. See
+// https://www.tensorflow.org/api_docs/python/tf/contrib/layers/
+// variance_scaling_initializer
+message VarianceScalingInitializer {
+  optional float factor = 1 [default = 2.0];
+  optional bool uniform = 2 [default = false];
+  enum Mode {
+    FAN_IN = 0;
+    FAN_OUT = 1;
+    FAN_AVG = 2;
+  }
+  optional Mode mode = 3 [default = FAN_IN];
+}
+// Configuration proto for batch norm to apply after convolution op. See
+// https://www.tensorflow.org/api_docs/python/tf/contrib/layers/batch_norm
+message BatchNorm {
+  optional float decay = 1 [default = 0.999];
+  optional bool center = 2 [default = true];
+  optional bool scale = 3 [default = false];
+  optional float epsilon = 4 [default = 0.001];
+  // Whether to train the batch norm variables. If this is set to false during
+  // training, the current value of the batch_norm variables are used for
+  // forward pass but they are never updated.
+  optional bool train = 5 [default = true];
+}
--- a/object_detection/protos/image_resizer.proto
+++ b/object_detection/protos/image_resizer.proto
+syntax = "proto2";
+package object_detection.protos;
+// Configuration proto for image resizing operations.
+// See builders/image_resizer_builder.py for details.
+message ImageResizer {
+  oneof image_resizer_oneof {
+    KeepAspectRatioResizer keep_aspect_ratio_resizer = 1;
+    FixedShapeResizer fixed_shape_resizer = 2;
+  }
+}
+// Configuration proto for image resizer that keeps aspect ratio.
+message KeepAspectRatioResizer {
+  // Desired size of the smaller image dimension in pixels.
+  optional int32 min_dimension = 1 [default = 600];
+  // Desired size of the larger image dimension in pixels.
+  optional int32 max_dimension = 2 [default = 1024];
+}
+// Configuration proto for image resizer that resizes to a fixed shape.
+message FixedShapeResizer {
+  // Desired height of image in pixels.
+  optional int32 height = 1 [default = 300];
+  // Desired width of image in pixels.
+  optional int32 width = 2 [default = 300];
+}