Merged commit includes the following changes: (#6315)

236813471 by lzc: Internal change. -- 236507310 by lzc: Fix preprocess.random_resize_method config type issue. The target height and width will be passed as "size" to tf.image.resize_images which only accepts integer. -- 236409989 by Zhichao Lu: Config export_to_tpu from function parameter instead of HParams for TPU inference. -- 236403186 by Zhichao Lu: Make graph file names optional arguments. -- 236237072 by Zhichao Lu: Minor bugfix for keyword args. -- 236209602 by Zhichao Lu: Add support for PartitionedVariable to get_variables_available_in_checkpoint. -- 235828658 by Zhichao Lu: Automatically stop evaluation jobs when training is finished. -- 235817964 by Zhichao Lu: Add an optional process_metrics_fn callback to eval_util, it gets called with evaluation results once each evaluation is complete. -- 235788721 by lzc: Fix yml file tf runtime...

Merged commit includes the following changes: (#6315)
236813471 by lzc: Internal change. -- 236507310 by lzc: Fix preprocess.random_resize_method config type issue. The target height and width will be passed as "size" to tf.image.resize_images which only accepts integer. -- 236409989 by Zhichao Lu: Config export_to_tpu from function parameter instead of HParams for TPU inference. -- 236403186 by Zhichao Lu: Make graph file names optional arguments. -- 236237072 by Zhichao Lu: Minor bugfix for keyword args. -- 236209602 by Zhichao Lu: Add support for PartitionedVariable to get_variables_available_in_checkpoint. -- 235828658 by Zhichao Lu: Automatically stop evaluation jobs when training is finished. -- 235817964 by Zhichao Lu: Add an optional process_metrics_fn callback to eval_util, it gets called with evaluation results once each evaluation is complete. -- 235788721 by lzc: Fix yml file tf runtime...
05584085 · pkulzc · Jonathan Huang · a5db4420 · 05584085 · 05584085
Commit 05584085 authored Mar 07, 2019 by pkulzc Committed by Jonathan Huang Mar 07, 2019
20 changed files
--- a/research/object_detection/models/ssd_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_feature_extractor_test.py
@@ -29,7 +29,7 @@ from object_detection.utils import test_case
 class SsdFeatureExtractorTestBase(test_case.TestCase):
-  def _build_conv_hyperparams(self):
+  def _build_conv_hyperparams(self, add_batch_norm=True):
    conv_hyperparams = hyperparams_pb2.Hyperparams()
    conv_hyperparams_text_proto = """
      activation: RELU_6
@@ -41,10 +41,14 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
        truncated_normal_initializer {
        }
      }
-      batch_norm {
-        scale: false
-      }
    """
+    if add_batch_norm:
+      batch_norm_proto = """
+        batch_norm {
+          scale: false
+        }
+      """
+      conv_hyperparams_text_proto += batch_norm_proto
    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
    return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)

--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
@@ -13,41 +13,69 @@
 # limitations under the License.
 # ==============================================================================
-"""Tests for ssd_mobilenet_v1_feature_extractor."""
+"""Tests for SSD Mobilenet V1 feature extractors.
+By using parameterized test decorator, this test serves for both Slim-based and
+Keras-based Mobilenet V1 feature extractors in SSD.
+"""
+from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
 from object_detection.models import ssd_feature_extractor_test
 from object_detection.models import ssd_mobilenet_v1_feature_extractor
+from object_detection.models import ssd_mobilenet_v1_keras_feature_extractor
 slim = tf.contrib.slim
+@parameterized.parameters(
+    {'use_keras': False},
+    {'use_keras': True},
+)
 class SsdMobilenetV1FeatureExtractorTest(
    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                is_training=True, use_explicit_padding=False):
+                                use_explicit_padding=False, is_training=False,
+                                use_keras=False):
    """Constructs a new feature extractor.
    Args:
      depth_multiplier: float depth multiplier for feature extractor
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      is_training: whether the network is in training mode.
      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
        inputs so that the output dimensions are the same as if 'SAME' padding
        were used.
+      is_training: whether the network is in training mode.
+      use_keras: if True builds a keras-based feature extractor, if False builds
+        a slim-based one.
    Returns:
      an ssd_meta_arch.SSDFeatureExtractor object.
    """
    min_depth = 32
-    return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
+    if use_keras:
-        is_training, depth_multiplier, min_depth, pad_to_multiple,
+      return (ssd_mobilenet_v1_keras_feature_extractor.
-        self.conv_hyperparams_fn,
+              SSDMobileNetV1KerasFeatureExtractor(
-        use_explicit_padding=use_explicit_padding)
+                  is_training=is_training,
+                  depth_multiplier=depth_multiplier,
-  def test_extract_features_returns_correct_shapes_128(self):
+                  min_depth=min_depth,
+                  pad_to_multiple=pad_to_multiple,
+                  conv_hyperparams=self._build_conv_hyperparams(
+                      add_batch_norm=False),
+                  freeze_batchnorm=False,
+                  inplace_batchnorm_update=False,
+                  use_explicit_padding=use_explicit_padding,
+                  name='MobilenetV1'))
+    else:
+      return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
+          is_training, depth_multiplier, min_depth, pad_to_multiple,
+          self.conv_hyperparams_fn,
+          use_explicit_padding=use_explicit_padding)
+  def test_extract_features_returns_correct_shapes_128(self, use_keras):
    image_height = 128
    image_width = 128
    depth_multiplier = 1.0
@@ -57,12 +85,14 @@ class SsdMobilenetV1FeatureExtractorTest(
                                  (2, 1, 1, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=False)
+        expected_feature_map_shape, use_explicit_padding=False,
+        use_keras=use_keras)
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=True)
+        expected_feature_map_shape, use_explicit_padding=True,
+        use_keras=use_keras)
-  def test_extract_features_returns_correct_shapes_299(self):
+  def test_extract_features_returns_correct_shapes_299(self, use_keras):
    image_height = 299
    image_width = 299
    depth_multiplier = 1.0
@@ -72,12 +102,14 @@ class SsdMobilenetV1FeatureExtractorTest(
                                  (2, 2, 2, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=False)
+        expected_feature_map_shape, use_explicit_padding=False,
+        use_keras=use_keras)
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=True)
+        expected_feature_map_shape, use_explicit_padding=True,
+        use_keras=use_keras)
-  def test_extract_features_with_dynamic_image_shape(self):
+  def test_extract_features_with_dynamic_image_shape(self, use_keras):
    image_height = 128
    image_width = 128
    depth_multiplier = 1.0
@@ -87,12 +119,15 @@ class SsdMobilenetV1FeatureExtractorTest(
                                  (2, 1, 1, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=False)
+        expected_feature_map_shape, use_explicit_padding=False,
+        use_keras=use_keras)
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=True)
+        expected_feature_map_shape, use_explicit_padding=True,
+        use_keras=use_keras)
-  def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
+  def test_extract_features_returns_correct_shapes_enforcing_min_depth(
+      self, use_keras):
    image_height = 299
    image_width = 299
    depth_multiplier = 0.5**12
@@ -102,12 +137,15 @@ class SsdMobilenetV1FeatureExtractorTest(
                                  (2, 2, 2, 32), (2, 1, 1, 32)]
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=False)
+        expected_feature_map_shape, use_explicit_padding=False,
+        use_keras=use_keras)
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=True)
+        expected_feature_map_shape, use_explicit_padding=True,
+        use_keras=use_keras)
-  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
+  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
+      self, use_keras):
    image_height = 299
    image_width = 299
    depth_multiplier = 1.0
@@ -117,48 +155,63 @@ class SsdMobilenetV1FeatureExtractorTest(
                                  (2, 2, 2, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=False)
+        expected_feature_map_shape, use_explicit_padding=False,
+        use_keras=use_keras)
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=True)
+        expected_feature_map_shape, use_explicit_padding=True,
+        use_keras=use_keras)
-  def test_extract_features_raises_error_with_invalid_image_size(self):
+  def test_extract_features_raises_error_with_invalid_image_size(
+      self, use_keras):
    image_height = 32
    image_width = 32
    depth_multiplier = 1.0
    pad_to_multiple = 1
    self.check_extract_features_raises_error_with_invalid_image_size(
-        image_height, image_width, depth_multiplier, pad_to_multiple)
+        image_height, image_width, depth_multiplier, pad_to_multiple,
+        use_keras=use_keras)
-  def test_preprocess_returns_correct_value_range(self):
+  def test_preprocess_returns_correct_value_range(self, use_keras):
    image_height = 128
    image_width = 128
    depth_multiplier = 1
    pad_to_multiple = 1
    test_image = np.random.rand(2, image_height, image_width, 3)
    feature_extractor = self._create_feature_extractor(depth_multiplier,
-                                                       pad_to_multiple)
+                                                       pad_to_multiple,
+                                                       use_keras=use_keras)
    preprocessed_image = feature_extractor.preprocess(test_image)
    self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
-  def test_variables_only_created_in_scope(self):
+  def test_variables_only_created_in_scope(self, use_keras):
    depth_multiplier = 1
    pad_to_multiple = 1
    scope_name = 'MobilenetV1'
    self.check_feature_extractor_variables_under_scope(
-        depth_multiplier, pad_to_multiple, scope_name)
+        depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras)
-  def test_has_fused_batchnorm(self):
+  def test_variable_count(self, use_keras):
+    depth_multiplier = 1
+    pad_to_multiple = 1
+    variables = self.get_feature_extractor_variables(
+        depth_multiplier, pad_to_multiple, use_keras=use_keras)
+    self.assertEqual(len(variables), 151)
+  def test_has_fused_batchnorm(self, use_keras):
    image_height = 40
    image_width = 40
    depth_multiplier = 1
    pad_to_multiple = 1
    image_placeholder = tf.placeholder(tf.float32,
                                       [1, image_height, image_width, 3])
-    feature_extractor = self._create_feature_extractor(depth_multiplier,
+    feature_extractor = self._create_feature_extractor(
-                                                       pad_to_multiple)
+        depth_multiplier, pad_to_multiple, use_keras=use_keras)
    preprocessed_image = feature_extractor.preprocess(image_placeholder)
-    _ = feature_extractor.extract_features(preprocessed_image)
+    if use_keras:
+      _ = feature_extractor(preprocessed_image)
+    else:
+      _ = feature_extractor.extract_features(preprocessed_image)
    self.assertTrue(any(op.type == 'FusedBatchNorm'
                        for op in tf.get_default_graph().get_operations()))

--- a/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SSDFeatureExtractor for Keras MobilenetV1 features."""
+import tensorflow as tf
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import feature_map_generators
+from object_detection.models.keras_models import mobilenet_v1
+from object_detection.utils import ops
+from object_detection.utils import shape_utils
+slim = tf.contrib.slim
+class SSDMobileNetV1KerasFeatureExtractor(
+    ssd_meta_arch.SSDKerasFeatureExtractor):
+  """SSD Feature Extractor using Keras MobilenetV1 features."""
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               use_explicit_padding=False,
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False,
+               name=None):
+    """Keras MobileNetV1 Feature Extractor for SSD Models.
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: Whether to freeze batch norm parameters during
+        training or not. When training with a small batch size (e.g. 1), it is
+        desirable to freeze batch norm update and use pretrained batch norm
+        params.
+      inplace_batchnorm_update: Whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+        inputs so that the output dimensions are the same as if 'SAME' padding
+        were used.
+      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: A string name scope to assign to the model. If 'None', Keras
+        will auto-generate one from the class name.
+    """
+    super(SSDMobileNetV1KerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+    self._feature_map_layout = {
+        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
+                       '', ''],
+        'layer_depth': [-1, -1, 512, 256, 256, 128],
+        'use_explicit_padding': self._use_explicit_padding,
+        'use_depthwise': self._use_depthwise,
+    }
+    self._mobilenet_v1 = None
+    self._feature_map_generator = None
+  def build(self, input_shape):
+    full_mobilenet_v1 = mobilenet_v1.mobilenet_v1(
+        batchnorm_training=(self._is_training and not self._freeze_batchnorm),
+        conv_hyperparams=(self._conv_hyperparams
+                          if self._override_base_feature_extractor_hyperparams
+                          else None),
+        weights=None,
+        use_explicit_padding=self._use_explicit_padding,
+        alpha=self._depth_multiplier,
+        min_depth=self._min_depth,
+        include_top=False)
+    conv2d_11_pointwise = full_mobilenet_v1.get_layer(
+        name='conv_pw_11_relu').output
+    conv2d_13_pointwise = full_mobilenet_v1.get_layer(
+        name='conv_pw_13_relu').output
+    self._mobilenet_v1 = tf.keras.Model(
+        inputs=full_mobilenet_v1.inputs,
+        outputs=[conv2d_11_pointwise, conv2d_13_pointwise])
+    self._feature_map_generator = (
+        feature_map_generators.KerasMultiResolutionFeatureMaps(
+            feature_map_layout=self._feature_map_layout,
+            depth_multiplier=self._depth_multiplier,
+            min_depth=self._min_depth,
+            insert_1x1_conv=True,
+            is_training=self._is_training,
+            conv_hyperparams=self._conv_hyperparams,
+            freeze_batchnorm=self._freeze_batchnorm,
+            name='FeatureMaps'))
+    self.built = True
+  def preprocess(self, resized_inputs):
+    """SSD preprocessing.
+    Maps pixel values to the range [-1, 1].
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    return (2.0 / 255.0) * resized_inputs - 1.0
+  def _extract_features(self, preprocessed_inputs):
+    """Extract features from preprocessed inputs.
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+    """
+    preprocessed_inputs = shape_utils.check_min_image_dim(
+        33, preprocessed_inputs)
+    image_features = self._mobilenet_v1(
+        ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
+    feature_maps = self._feature_map_generator({
+        'Conv2d_11_pointwise': image_features[0],
+        'Conv2d_13_pointwise': image_features[1]})
+    return feature_maps.values()
--- a/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
@@ -19,7 +19,7 @@ import tensorflow as tf
 from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
-from object_detection.models.keras_applications import mobilenet_v2
+from object_detection.models.keras_models import mobilenet_v2
 from object_detection.utils import ops
 from object_detection.utils import shape_utils

--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
@@ -53,8 +53,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-        UNUSED currently.
+      min_depth: minimum feature extractor depth.
-      min_depth: minimum feature extractor depth. UNUSED Currently.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
@@ -96,9 +95,6 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        use_depthwise=use_depthwise,
        override_base_feature_extractor_hyperparams=
        override_base_feature_extractor_hyperparams)
-    if self._depth_multiplier != 1.0:
-      raise ValueError('Only depth 1.0 is supported, found: {}'.
-                       format(self._depth_multiplier))
    if self._use_explicit_padding is True:
      raise ValueError('Explicit padding is not a valid option.')
    self._resnet_base_fn = resnet_base_fn
@@ -150,13 +146,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
-    Raises:
-      ValueError: depth multiplier is not supported.
    """
-    if self._depth_multiplier != 1.0:
-      raise ValueError('Depth multiplier not supported.')
    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)
@@ -174,8 +164,11 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
              global_pool=False,
              output_stride=None,
              store_non_strided_activations=True,
+              min_base_depth=self._min_depth,
+              depth_multiplier=self._depth_multiplier,
              scope=scope)
          image_features = self._filter_features(image_features)
+      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope(self._fpn_scope_name,
                               reuse=self._reuse_weights):
@@ -185,7 +178,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
            feature_block_list.append('block{}'.format(level - 1))
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
-              depth=self._additional_layer_depth)
+              depth=depth_fn(self._additional_layer_depth))
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(
@@ -196,7 +189,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
          for i in range(base_fpn_max_level, self._fpn_max_level):
            last_feature_map = slim.conv2d(
                last_feature_map,
-                num_outputs=self._additional_layer_depth,
+                num_outputs=depth_fn(self._additional_layer_depth),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
@@ -226,8 +219,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-        UNUSED currently.
+      min_depth: minimum feature extractor depth.
-      min_depth: minimum feature extractor depth. UNUSED Currently.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
@@ -284,8 +276,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-        UNUSED currently.
+      min_depth: minimum feature extractor depth.
-      min_depth: minimum feature extractor depth. UNUSED Currently.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
@@ -342,8 +333,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-        UNUSED currently.
+      min_depth: minimum feature extractor depth.
-      min_depth: minimum feature extractor depth. UNUSED Currently.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d

--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
@@ -25,8 +25,7 @@ class SSDResnet50V1FeatureExtractorTest(
  """SSDResnet50v1Fpn feature extractor test."""
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                use_explicit_padding=False):
+                                use_explicit_padding=False, min_depth=32):
-    min_depth = 32
    is_training = True
    return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
@@ -42,8 +41,7 @@ class SSDResnet101V1FeatureExtractorTest(
  """SSDResnet101v1Fpn feature extractor test."""
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                use_explicit_padding=False):
+                                use_explicit_padding=False, min_depth=32):
-    min_depth = 32
    is_training = True
    return (
        ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor(
@@ -64,8 +62,7 @@ class SSDResnet152V1FeatureExtractorTest(
  """SSDResnet152v1Fpn feature extractor test."""
  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                use_explicit_padding=False):
+                                use_explicit_padding=False, min_depth=32):
-    min_depth = 32
    is_training = True
    return (
        ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor(

--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
@@ -14,6 +14,7 @@
 # ==============================================================================
 """Tests for ssd resnet v1 FPN feature extractors."""
 import abc
+import itertools
 import numpy as np
 import tensorflow as tf
@@ -32,6 +33,14 @@ class SSDResnetFPNFeatureExtractorTestBase(
  def _fpn_scope_name(self):
    return 'fpn'
+  @abc.abstractmethod
+  def _create_feature_extractor(self,
+                                depth_multiplier,
+                                pad_to_multiple,
+                                use_explicit_padding=False,
+                                min_depth=32):
+    pass
  def test_extract_features_returns_correct_shapes_256(self):
    image_height = 256
    image_width = 256
@@ -56,6 +65,45 @@ class SSDResnetFPNFeatureExtractorTestBase(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)
+  def test_extract_features_returns_correct_shapes_with_depth_multiplier(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 0.5
+    expected_num_channels = int(256 * depth_multiplier)
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 32, 32, expected_num_channels),
+                                  (2, 16, 16, expected_num_channels),
+                                  (2, 8, 8, expected_num_channels),
+                                  (2, 4, 4, expected_num_channels),
+                                  (2, 2, 2, expected_num_channels)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape)
+  def test_extract_features_returns_correct_shapes_with_min_depth(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    min_depth = 320
+    expected_feature_map_shape = [(2, 32, 32, min_depth),
+                                  (2, 16, 16, min_depth),
+                                  (2, 8, 8, min_depth),
+                                  (2, 4, 4, min_depth),
+                                  (2, 2, 2, min_depth)]
+    def graph_fn(image_tensor):
+      feature_extractor = self._create_feature_extractor(
+          depth_multiplier, pad_to_multiple, min_depth=min_depth)
+      return feature_extractor.extract_features(image_tensor)
+    image_tensor = np.random.rand(2, image_height, image_width,
+                                  3).astype(np.float32)
+    feature_maps = self.execute(graph_fn, [image_tensor])
+    for feature_map, expected_shape in itertools.izip(
+        feature_maps, expected_feature_map_shape):
+      self.assertAllEqual(feature_map.shape, expected_shape)
  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
    image_height = 254
    image_width = 254

--- a/research/object_detection/object_detection_tutorial.ipynb
+++ b/research/object_detection/object_detection_tutorial.ipynb
@@ -54,8 +54,8 @@
        "sys.path.append(\"..\")\n",
        "from object_detection.utils import ops as utils_ops\n",
        "\n",
-        "if StrictVersion(tf.__version__) \u003c StrictVersion('1.9.0'):\n",
+        "if StrictVersion(tf.__version__) \u003c StrictVersion('1.12.0'):\n",
-        "  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')\n"
+        "  raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')\n"
      ]
    },
    {

--- a/research/object_detection/predictors/convolutional_box_predictor.py
+++ b/research/object_detection/predictors/convolutional_box_predictor.py
@@ -108,14 +108,16 @@ class ConvolutionalBoxPredictor(box_predictor.BoxPredictor):
        feature map.
    Returns:
-      box_encodings: A list of float tensors of shape
+      A dictionary containing:
-        [batch_size, num_anchors_i, q, code_size] representing the location of
+        box_encodings: A list of float tensors of shape
-        the objects, where q is 1 or the number of classes. Each entry in the
+          [batch_size, num_anchors_i, q, code_size] representing the location of
-        list corresponds to a feature map in the input `image_features` list.
+          the objects, where q is 1 or the number of classes. Each entry in the
-      class_predictions_with_background: A list of float tensors of shape
+          list corresponds to a feature map in the input `image_features` list.
-        [batch_size, num_anchors_i, num_classes + 1] representing the class
+        class_predictions_with_background: A list of float tensors of shape
-        predictions for the proposals. Each entry in the list corresponds to a
+          [batch_size, num_anchors_i, num_classes + 1] representing the class
-        feature map in the input `image_features` list.
+          predictions for the proposals. Each entry in the list corresponds to a
+          feature map in the input `image_features` list.
+        (optional) Predictions from other heads.
    """
    predictions = {
        BOX_ENCODINGS: [],
@@ -226,8 +228,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
      kernel_size: Size of final convolution kernel.
      apply_batch_norm: Whether to apply batch normalization to conv layers in
        this predictor.
-      share_prediction_tower: Whether to share the multi-layer tower between box
+      share_prediction_tower: Whether to share the multi-layer tower among box
-        prediction and class prediction heads.
+        prediction head, class prediction head and other heads.
      use_depthwise: Whether to use depthwise separable conv2d instead of
       regular conv2d.
    """
@@ -270,9 +272,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
    inserted_layer_counter += 1
    return image_feature, inserted_layer_counter
-  def _compute_base_tower(self, tower_name_scope, image_feature, feature_index,
+  def _compute_base_tower(self, tower_name_scope, image_feature, feature_index):
-                          has_different_feature_channels, target_channel,
-                          inserted_layer_counter):
    net = image_feature
    for i in range(self._num_layers_before_predictor):
      if self._use_depthwise:
@@ -296,23 +296,18 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
    return net
  def _predict_head(self, head_name, head_obj, image_feature, box_tower_feature,
-                    feature_index, has_different_feature_channels,
+                    feature_index, num_predictions_per_location):
-                    target_channel, inserted_layer_counter,
-                    num_predictions_per_location):
    if head_name == CLASS_PREDICTIONS_WITH_BACKGROUND:
      tower_name_scope = 'ClassPredictionTower'
    else:
-      raise ValueError('Unknown head')
+      tower_name_scope = head_name + 'PredictionTower'
    if self._share_prediction_tower:
      head_tower_feature = box_tower_feature
    else:
      head_tower_feature = self._compute_base_tower(
          tower_name_scope=tower_name_scope,
          image_feature=image_feature,
-          feature_index=feature_index,
+          feature_index=feature_index)
-          has_different_feature_channels=has_different_feature_channels,
-          target_channel=target_channel,
-          inserted_layer_counter=inserted_layer_counter)
    return head_obj.predict(
        features=head_tower_feature,
        num_predictions_per_location=num_predictions_per_location)
@@ -341,13 +336,13 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
          [batch_size, num_anchors_i, num_classes + 1] representing the class
          predictions for the proposals. Each entry in the list corresponds to a
          feature map in the input `image_features` list.
-        (optional) mask_predictions: A list of float tensors of shape
+        (optional) Predictions from other heads.
+          E.g., mask_predictions: A list of float tensors of shape
          [batch_size, num_anchord_i, num_classes, mask_height, mask_width].
    Raises:
-      ValueError: If the image feature maps do not have the same number of
+      ValueError: If the num predictions per locations differs between the
-        channels or if the num predictions per locations is differs between the
        feature maps.
    """
    if len(set(num_predictions_per_location_list)) > 1:
@@ -392,10 +387,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
          box_tower_feature = self._compute_base_tower(
              tower_name_scope=box_tower_scope,
              image_feature=image_feature,
-              feature_index=feature_index,
+              feature_index=feature_index)
-              has_different_feature_channels=has_different_feature_channels,
-              target_channel=target_channel,
-              inserted_layer_counter=inserted_layer_counter)
          box_encodings = self._box_prediction_head.predict(
              features=box_tower_feature,
              num_predictions_per_location=num_predictions_per_location)
@@ -413,9 +405,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
                image_feature=image_feature,
                box_tower_feature=box_tower_feature,
                feature_index=feature_index,
-                has_different_feature_channels=has_different_feature_channels,
-                target_channel=target_channel,
-                inserted_layer_counter=inserted_layer_counter,
                num_predictions_per_location=num_predictions_per_location)
            predictions[head_name].append(prediction)
    return predictions
--- a/research/object_detection/predictors/convolutional_box_predictor_test.py
+++ b/research/object_detection/predictors/convolutional_box_predictor_test.py
@@ -14,6 +14,8 @@
 # ==============================================================================
 """Tests for object_detection.predictors.convolutional_box_predictor."""
+from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
@@ -21,6 +23,9 @@ from google.protobuf import text_format
 from object_detection.builders import box_predictor_builder
 from object_detection.builders import hyperparams_builder
 from object_detection.predictors import convolutional_box_predictor as box_predictor
+from object_detection.predictors.heads import box_head
+from object_detection.predictors.heads import class_head
+from object_detection.predictors.heads import mask_head
 from object_detection.protos import hyperparams_pb2
 from object_detection.utils import test_case
@@ -852,5 +857,66 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
      self.assertAllEqual(objectness_predictions_shape,
                          [4, expected_num_anchors, 1])
+  def test_other_heads_predictions(self):
+    box_code_size = 4
+    num_classes_without_background = 3
+    other_head_name = 'Mask'
+    mask_height = 5
+    mask_width = 5
+    num_predictions_per_location = 5
+    def graph_fn(image_features):
+      box_prediction_head = box_head.WeightSharedConvolutionalBoxHead(
+          box_code_size)
+      class_prediction_head = class_head.WeightSharedConvolutionalClassHead(
+          num_classes_without_background + 1)
+      other_heads = {
+          other_head_name:
+              mask_head.WeightSharedConvolutionalMaskHead(
+                  num_classes_without_background,
+                  mask_height=mask_height,
+                  mask_width=mask_width)
+      }
+      conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
+          is_training=False,
+          num_classes=num_classes_without_background,
+          box_prediction_head=box_prediction_head,
+          class_prediction_head=class_prediction_head,
+          other_heads=other_heads,
+          conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
+          depth=32,
+          num_layers_before_predictor=2)
+      box_predictions = conv_box_predictor.predict(
+          [image_features],
+          num_predictions_per_location=[num_predictions_per_location],
+          scope='BoxPredictor')
+      for key, value in box_predictions.items():
+        box_predictions[key] = tf.concat(value, axis=1)
+      assert len(box_predictions) == 3
+      return (box_predictions[box_predictor.BOX_ENCODINGS],
+              box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
+              box_predictions[other_head_name])
+    batch_size = 4
+    feature_ht = 8
+    feature_wt = 8
+    image_features = np.random.rand(batch_size, feature_ht, feature_wt,
+                                    64).astype(np.float32)
+    (box_encodings, class_predictions, other_head_predictions) = self.execute(
+        graph_fn, [image_features])
+    num_anchors = feature_ht * feature_wt * num_predictions_per_location
+    self.assertAllEqual(box_encodings.shape,
+                        [batch_size, num_anchors, box_code_size])
+    self.assertAllEqual(
+        class_predictions.shape,
+        [batch_size, num_anchors, num_classes_without_background + 1])
+    self.assertAllEqual(other_head_predictions.shape, [
+        batch_size, num_anchors, num_classes_without_background, mask_height,
+        mask_width
+    ])
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/predictors/convolutional_keras_box_predictor_test.py
+++ b/research/object_detection/predictors/convolutional_keras_box_predictor_test.py
@@ -191,7 +191,69 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
    self.assertEqual(conv_box_predictor._sorted_head_names,
                     ['box_encodings', 'class_predictions_with_background'])
-  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
+  def test_use_depthwise_convolution(self):
+    image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
+    conv_box_predictor = (
+        box_predictor_builder.build_convolutional_keras_box_predictor(
+            is_training=False,
+            num_classes=0,
+            conv_hyperparams=self._build_conv_hyperparams(),
+            freeze_batchnorm=False,
+            inplace_batchnorm_update=False,
+            num_predictions_per_location_list=[5],
+            min_depth=0,
+            max_depth=32,
+            num_layers_before_predictor=1,
+            use_dropout=True,
+            dropout_keep_prob=0.8,
+            kernel_size=1,
+            box_code_size=4,
+            use_depthwise=True
+        ))
+    box_predictions = conv_box_predictor([image_features])
+    box_encodings = tf.concat(
+        box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
+    objectness_predictions = tf.concat(
+        box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
+        axis=1)
+    init_op = tf.global_variables_initializer()
+    resolution = 32
+    expected_num_anchors = resolution*resolution*5
+    with self.test_session() as sess:
+      sess.run(init_op)
+      (box_encodings_shape,
+       objectness_predictions_shape) = sess.run(
+           [tf.shape(box_encodings), tf.shape(objectness_predictions)],
+           feed_dict={image_features:
+                      np.random.rand(4, resolution, resolution, 64)})
+      actual_variable_set = set(
+          [var.op.name for var in tf.trainable_variables()])
+    self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
+    self.assertAllEqual(objectness_predictions_shape,
+                        [4, expected_num_anchors, 1])
+    expected_variable_set = set([
+        'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias',
+        'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/kernel',
+        'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor_depthwise/'
+        'bias',
+        'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor_depthwise/'
+        'depthwise_kernel',
+        'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias',
+        'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel',
+        'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor_depthwise/bias',
+        'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor_depthwise/'
+        'depthwise_kernel',
+        'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias',
+        'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/kernel'])
+    self.assertEqual(expected_variable_set, actual_variable_set)
+    self.assertEqual(conv_box_predictor._sorted_head_names,
+                     ['box_encodings', 'class_predictions_with_background'])
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/predictors/heads/keras_box_head_test.py
+++ b/research/object_detection/predictors/heads/keras_box_head_test.py
@@ -56,7 +56,20 @@ class ConvolutionalKerasBoxHeadTest(test_case.TestCase):
    box_encodings = box_prediction_head(image_feature)
    self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())
-  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
+  def test_prediction_size_depthwise_true(self):
+    conv_hyperparams = self._build_conv_hyperparams()
+    box_prediction_head = keras_box_head.ConvolutionalBoxHead(
+        is_training=True,
+        box_code_size=4,
+        kernel_size=3,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=False,
+        num_predictions_per_location=1,
+        use_depthwise=True)
+    image_feature = tf.random_uniform(
+        [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+    box_encodings = box_prediction_head(image_feature)
+    self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/predictors/heads/keras_class_head_test.py
+++ b/research/object_detection/predictors/heads/keras_class_head_test.py
@@ -59,7 +59,23 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
    self.assertAllEqual([64, 323, 20],
                        class_predictions.get_shape().as_list())
-  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
+  def test_prediction_size_depthwise_true(self):
+    conv_hyperparams = self._build_conv_hyperparams()
+    class_prediction_head = keras_class_head.ConvolutionalClassHead(
+        is_training=True,
+        num_class_slots=20,
+        use_dropout=True,
+        dropout_keep_prob=0.5,
+        kernel_size=3,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=False,
+        num_predictions_per_location=1,
+        use_depthwise=True)
+    image_feature = tf.random_uniform(
+        [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+    class_predictions = class_prediction_head(image_feature,)
+    self.assertAllEqual([64, 323, 20],
+                        class_predictions.get_shape().as_list())
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/predictors/heads/keras_mask_head_test.py
+++ b/research/object_detection/predictors/heads/keras_mask_head_test.py
@@ -61,7 +61,25 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
    self.assertAllEqual([64, 323, 20, 7, 7],
                        mask_predictions.get_shape().as_list())
-  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
+  def test_prediction_size_use_depthwise_true(self):
+    conv_hyperparams = self._build_conv_hyperparams()
+    mask_prediction_head = keras_mask_head.ConvolutionalMaskHead(
+        is_training=True,
+        num_classes=20,
+        use_dropout=True,
+        dropout_keep_prob=0.5,
+        kernel_size=3,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=False,
+        num_predictions_per_location=1,
+        use_depthwise=True,
+        mask_height=7,
+        mask_width=7)
+    image_feature = tf.random_uniform(
+        [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+    mask_predictions = mask_prediction_head(image_feature)
+    self.assertAllEqual([64, 323, 20, 7, 7],
+                        mask_predictions.get_shape().as_list())
  def test_class_agnostic_prediction_size_use_depthwise_false(self):
    conv_hyperparams = self._build_conv_hyperparams()
@@ -84,7 +102,26 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
    self.assertAllEqual([64, 323, 1, 7, 7],
                        mask_predictions.get_shape().as_list())
-  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
+  def test_class_agnostic_prediction_size_use_depthwise_true(self):
+    conv_hyperparams = self._build_conv_hyperparams()
+    mask_prediction_head = keras_mask_head.ConvolutionalMaskHead(
+        is_training=True,
+        num_classes=20,
+        use_dropout=True,
+        dropout_keep_prob=0.5,
+        kernel_size=3,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=False,
+        num_predictions_per_location=1,
+        use_depthwise=True,
+        mask_height=7,
+        mask_width=7,
+        masks_are_class_agnostic=True)
+    image_feature = tf.random_uniform(
+        [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+    mask_predictions = mask_prediction_head(image_feature)
+    self.assertAllEqual([64, 323, 1, 7, 7],
+                        mask_predictions.get_shape().as_list())
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/protos/calibration.proto
+++ b/research/object_detection/protos/calibration.proto
+syntax = "proto2";
+package object_detection.protos;
+// Message wrapper for various calibration configurations
+message CalibrationConfig {
+  oneof calibrator {
+    // Class-agnostic calibration via linear interpolation (usually output from
+    // isotonic regression)
+    FunctionApproximation function_approximation = 1;
+    // Per-class calibration via linear interpolation
+    LabelFunctionApproximations label_function_approximations = 2;
+    // Class-agnostic sigmoid calibration
+    SigmoidCalibration sigmoid_calibration = 3;
+    // Per-class sigmoid calibration
+    LabelSigmoidCalibrations label_sigmoid_calibrations = 4;
+  }
+}
+// Message for class-agnostic domain/range mapping for function
+// approximations
+message FunctionApproximation {
+  // Message mapping class labels to indices
+  optional XYPairs x_y_pairs = 1;
+}
+// Message for class-specific domain/range mapping for function
+// approximations
+message LabelFunctionApproximations {
+  // Message mapping class labels to indices
+  map<string, XYPairs> label_xy_pairs_map = 1;
+  // Label map to map label names from to class ids.
+  optional string label_map_path = 2;
+}
+// Message for class-agnostic Sigmoid Calibration
+message SigmoidCalibration {
+  // Message mapping class index to Sigmoid Parameters
+  optional SigmoidParameters sigmoid_parameters = 1;
+}
+// Message for class-specific Sigmoid Calibration
+message LabelSigmoidCalibrations {
+  // Message mapping class index to Sigmoid Parameters
+  map<string, SigmoidParameters> label_sigmoid_parameters_map = 1;
+  // Label map to map label names from to class ids.
+  optional string label_map_path = 2;
+}
+// Message to store a domain/range pair for function to be approximated
+message XYPairs {
+  message XYPair {
+    optional float x = 1;
+    optional float y = 2;
+  }
+  // Sequence of x/y pairs for function approximation
+  repeated XYPair x_y_pair = 1;
+}
+// Message defining parameters for sigmoid calibration.
+message SigmoidParameters {
+  optional float a = 1 [default = -1.0];
+  optional float b = 2 [default = 0.0];
+}
--- a/research/object_detection/protos/image_resizer.proto
+++ b/research/object_detection/protos/image_resizer.proto
@@ -8,6 +8,7 @@ message ImageResizer {
  oneof image_resizer_oneof {
    KeepAspectRatioResizer keep_aspect_ratio_resizer = 1;
    FixedShapeResizer fixed_shape_resizer = 2;
+    IdentityResizer identity_resizer = 3;
  }
 }
@@ -19,6 +20,9 @@ enum ResizeType {
  AREA = 3; // Corresponds to tf.image.ResizeMethod.AREA
 }
+message IdentityResizer {
+}
 // Configuration proto for image resizer that keeps aspect ratio.
 message KeepAspectRatioResizer {
  // Desired size of the smaller image dimension in pixels.

--- a/research/object_detection/protos/input_reader.proto
+++ b/research/object_detection/protos/input_reader.proto
@@ -22,7 +22,7 @@ enum InstanceMaskType {
  PNG_MASKS = 2;        // Encoded PNG masks.
 }
-// Next id: 24
+// Next id: 25
 message InputReader {
  // Name of input reader. Typically used to describe the dataset that is read
  // by this input reader.
@@ -94,6 +94,9 @@ message InputReader {
  // otherwise some groundtruth boxes may be clipped.
  optional int32 max_number_of_boxes = 21 [default=100];
+  // Whether to load multiclass scores from the dataset.
+  optional bool load_multiclass_scores = 24 [default = false];
  // Whether to load groundtruth instance masks.
  optional bool load_instance_masks = 7 [default = false];

--- a/research/object_detection/protos/optimizer.proto
+++ b/research/object_detection/protos/optimizer.proto
@@ -38,6 +38,7 @@ message AdamOptimizer {
  optional LearningRate learning_rate = 1;
 }
 // Configuration message for optimizer learning rate.
 message LearningRate {
  oneof learning_rate {

--- a/research/object_detection/protos/post_processing.proto
+++ b/research/object_detection/protos/post_processing.proto
@@ -2,6 +2,8 @@ syntax = "proto2";
 package object_detection.protos;
+import "object_detection/protos/calibration.proto";
 // Configuration proto for non-max-suppression operation on a batch of
 // detections.
 message BatchNonMaxSuppression {
@@ -46,4 +48,7 @@ message PostProcessing {
  // Typically used for softmax distillation, though can be used to scale for
  // other reasons.
  optional float logit_scale = 3 [default = 1.0];
+  // Calibrate score outputs. Calibration is applied after score converter
+  // and before non max suppression.
+  optional CalibrationConfig calibration_config = 4;
 }
--- a/research/object_detection/protos/preprocessor.proto
+++ b/research/object_detection/protos/preprocessor.proto
@@ -34,6 +34,8 @@ message PreprocessingStep {
    RandomRotation90 random_rotation90 = 26;
    RGBtoGray rgb_to_gray = 27;
    ConvertClassLogitsToSoftmax convert_class_logits_to_softmax = 28;
+    RandomAbsolutePadImage random_absolute_pad_image = 29;
+    RandomSelfConcatImage random_self_concat_image = 30;
  }
 }
@@ -179,6 +181,18 @@ message RandomPadImage {
  repeated float pad_color = 5;
 }
+// Randomly adds a padding of size [0, max_height_padding), [0, max_width_padding).
+message RandomAbsolutePadImage {
+  // Height will be padded uniformly at random from [0, max_height_padding).
+  optional int32 max_height_padding = 1;
+  // Width will be padded uniformly at random from [0, max_width_padding).
+  optional int32 max_width_padding = 2;
+  // Color of the padding. If unset, will pad using average color of the input
+  // image.
+  repeated float pad_color = 3;
+}
 // Randomly crops an image followed by a random pad.
 message RandomCropPadImage {
  // Cropping operation must cover at least one box by this fraction.
@@ -243,8 +257,8 @@ message RandomBlackPatches {
 // Randomly resizes the image up to [target_height, target_width].
 message RandomResizeMethod {
-  optional float target_height = 1;
+  optional int32 target_height = 1;
-  optional float target_width = 2;
+  optional int32 target_width = 2;
 }
 // Converts the RGB image to a grayscale image. This also converts the image
@@ -439,3 +453,11 @@ message ConvertClassLogitsToSoftmax {
  // Scale to use on logits before applying softmax.
  optional float temperature = 1 [default=1.0];
 }
+// Randomly concatenates the image with itself horizontally and/or vertically.
+message RandomSelfConcatImage {
+  // Probability of concatenating the image vertically.
+  optional float concat_vertical_probability = 1 [default = 0.1];
+  // Probability of concatenating the image horizontally.
+  optional float concat_horizontal_probability = 2 [default = 0.1];
+}