Merged commit includes the following changes: (#6315)

236813471 by lzc: Internal change. -- 236507310 by lzc: Fix preprocess.random_resize_method config type issue. The target height and width will be passed as "size" to tf.image.resize_images which only accepts integer. -- 236409989 by Zhichao Lu: Config export_to_tpu from function parameter instead of HParams for TPU inference. -- 236403186 by Zhichao Lu: Make graph file names optional arguments. -- 236237072 by Zhichao Lu: Minor bugfix for keyword args. -- 236209602 by Zhichao Lu: Add support for PartitionedVariable to get_variables_available_in_checkpoint. -- 235828658 by Zhichao Lu: Automatically stop evaluation jobs when training is finished. -- 235817964 by Zhichao Lu: Add an optional process_metrics_fn callback to eval_util, it gets called with evaluation results once each evaluation is complete. -- 235788721 by lzc: Fix yml file tf runtime version. -- 235262897 by Zhichao Lu: Add keypoint support to the random_pad_image preprocessor method. -- 235257380 by Zhichao Lu: Support InputDataFields.groundtruth_confidences in retain_groundtruth(), retain_groundtruth_with_positive_classes(), filter_groundtruth_with_crowd_boxes(), filter_groundtruth_with_nan_box_coordinates(), filter_unrecognized_classes(). -- 235109188 by Zhichao Lu: Fix bug in pad_input_data_to_static_shapes for num_additional_channels > 0; make color-specific data augmentation only touch RGB channels. -- 235045010 by Zhichao Lu: Don't slice class_predictions_with_background when add_background_class is false. -- 235026189 by lzc: Fix import in g3doc. -- 234863426 by Zhichao Lu: Added fixes in exporter to allow writing a checkpoint to a specified temporary directory. -- 234671886 by lzc: Internal Change. -- 234630803 by rathodv: Internal Change. -- 233985896 by Zhichao Lu: Add Neumann optimizer to object detection. -- 233560911 by Zhichao Lu: Add NAS-FPN object detection with Resnet and Mobilenet v2. -- 233513536 by Zhichao Lu: Export TPU compatible object detection model -- 233495772 by lzc: Internal change. -- 233453557 by Zhichao Lu: Create Keras-based SSD+MobilenetV1 for object detection. -- 233220074 by lzc: Update release notes date. -- 233165761 by Zhichao Lu: Support depth_multiplier and min_depth in _SSDResnetV1FpnFeatureExtractor. -- 233160046 by lzc: Internal change. -- 232926599 by Zhichao Lu: [tf.data] Switching tf.data functions to use `defun`, providing an escape hatch to continue using the legacy `Defun`. There are subtle differences between the implementation of `defun` and `Defun` (such as resources handling or control flow) and it is possible that input pipelines that use control flow or resources in their functions might be affected by this change. To migrate majority of existing pipelines to the recommended way of creating functions in TF 2.0 world, while allowing (a small number of) existing pipelines to continue relying on the deprecated behavior, this CL provides an escape hatch. If your input pipeline is affected by this CL, it should apply the escape hatch by replacing `foo.map(...)` with `foo.map_with_legacy_function(...)`. -- 232891621 by Zhichao Lu: Modify faster_rcnn meta architecture to normalize raw detections. -- 232875817 by Zhichao Lu: Make calibration a post-processing step. Specifically: - Move the calibration config from pipeline.proto --> post_processing.proto - Edit post_processing_builder.py to return a calibration function. If no calibration config is provided, it None. - Edit SSD and FasterRCNN meta architectures to optionally call the calibration function on detection scores after score conversion and before NMS. -- 232704481 by Zhichao Lu: Edit calibration builder to build a function that will be used within a detection model's `postprocess` method, after score conversion and before non-maxima suppression. Specific Edits: - The returned function now accepts class_predictions_with_background as its argument instead of detection_scores and detection_classes. - Class-specific calibration was temporarily removed, as it requires more significant refactoring. Will be added later. -- 232615379 by Zhichao Lu: Internal change -- 232483345 by ronnyvotel: Making the use of bfloat16 restricted to TPUs. -- 232399572 by Zhichao Lu: Edit calibration builder and proto to support class-agnostic calibration. Specifically: - Edit calibration protos to include path to relevant label map if required for class-specific calibration. Previously, label maps were inferred from other parts of the pipeline proto; this allows all information required by the builder stay within the calibration proto and remove extraneous information from being passed with class-agnostic calibration. - Add class-agnostic protos to the calibration config. Note that the proto supports sigmoid and linear interpolation parameters, but the builder currently only supports linear interpolation. -- 231613048 by Zhichao Lu: Add calibration builder for applying calibration transformations from output of object detection models. Specifically: - Add calibration proto to support sigmoid and isotonic regression (stepwise function) calibration. - Add a builder to support calibration from isotonic regression outputs. -- 231519786 by lzc: model_builder test refactor. - removed proto text boilerplate in each test case and let them call a create_default_proto function instead. - consolidated all separate ssd model creation tests into one. - consolidated all separate faster rcnn model creation tests into one. - used parameterized test for testing mask rcnn models and use_matmul_crop_and_resize - added all failures test. -- 231448169 by Zhichao Lu: Return static shape as a constant tensor. -- 231423126 by lzc: Add a release note for OID v4 models. -- 231401941 by Zhichao Lu: Adding correct labelmap for the models trained on Open Images V4 (*oid_v4 config suffix). -- 231320357 by Zhichao Lu: Add scope to Nearest Neighbor Resize op so that it stays in the same name scope as the original resize ops. -- 231257699 by Zhichao Lu: Switch to using preserve_aspect_ratio in tf.image.resize_images rather than using a custom implementation. -- 231247368 by rathodv: Internal change. -- 231004874 by lzc: Update documentations to use tf 1.12 for object detection API. -- 230999911 by rathodv: Use tf.batch_gather instead of ops.batch_gather -- 230999720 by huizhongc: Fix weight equalization test in ops_test. -- 230984728 by rathodv: Internal update. -- 230929019 by lzc: Add an option to replace preprocess operation with placeholder for ssd feature extractor. -- 230845266 by lzc: Require tensorflow version 1.12 for object detection API and rename keras_applications to keras_models -- 230392064 by lzc: Add RetinaNet 101 checkpoint trained on OID v4 to detection model zoo. -- 230014128 by derekjchow: This file was re-located below the tensorflow/lite/g3doc/convert -- 229941449 by lzc: Update SSD mobilenet v2 quantized model download path. -- 229843662 by lzc: Add an option to use native resize tf op in fpn top-down feature map generation. -- 229636034 by rathodv: Add deprecation notice to a few old parameters in train.proto -- 228959078 by derekjchow: Remove duplicate elif case in _check_and_convert_legacy_input_config_key -- 228749719 by rathodv: Minor refactoring to make exporter's `build_detection_graph` method public. -- 228573828 by rathodv: Mofity model.postprocess to return raw detections and raw scores. Modify, post-process methods in core/model.py and the meta architectures to export raw detection (without any non-max suppression) and raw multiclass score logits for those detections. -- 228420670 by Zhichao Lu: Add shims for custom architectures for object detection models. -- 228241692 by Zhichao Lu: Fix the comment on "losses_mask" in "Loss" class. -- 228223810 by Zhichao Lu: Support other_heads' predictions in WeightSharedConvolutionalBoxPredictor. Also remove a few unused parameters and fix a couple of comments in convolutional_box_predictor.py. -- 228200588 by Zhichao Lu: Add Expected Calibration Error and an evaluator that calculates the metric for object detections. -- 228167740 by lzc: Add option to use bounded activations in FPN top-down feature map generation. -- 227767700 by rathodv: Internal. -- 226295236 by Zhichao Lu: Add Open Image V4 Resnet101-FPN training config to third_party -- 226254842 by Zhichao Lu: Fix typo in documentation. -- 225833971 by Zhichao Lu: Option to have no resizer in object detection model. -- 225824890 by lzc: Fixes p3 compatibility for model_lib.py -- 225760897 by menglong: normalizer should be at least 1. -- 225559842 by menglong: Add extra logic filtering unrecognized classes. -- 225379421 by lzc: Add faster_rcnn_inception_resnet_v2_atrous_oid_v4 config to third_party -- 225368337 by Zhichao Lu: Add extra logic filtering unrecognized classes. -- 225341095 by Zhichao Lu: Adding Open Images V4 models to OD API model zoo and corresponding configs to the configs. -- 225218450 by menglong: Add extra logic filtering unrecognized classes. -- 225057591 by Zhichao Lu: Internal change. -- 224895417 by rathodv: Internal change. -- 224209282 by Zhichao Lu: Add two data augmentations to object detection: (1) Self-concat (2) Absolute pads. -- 224073762 by Zhichao Lu: Do not create tf.constant until _generate() is actually called in the object detector. -- PiperOrigin-RevId: 236813471

Merged commit includes the following changes: (#6315)
236813471 by lzc: Internal change. -- 236507310 by lzc: Fix preprocess.random_resize_method config type issue. The target height and width will be passed as "size" to tf.image.resize_images which only accepts integer. -- 236409989 by Zhichao Lu: Config export_to_tpu from function parameter instead of HParams for TPU inference. -- 236403186 by Zhichao Lu: Make graph file names optional arguments. -- 236237072 by Zhichao Lu: Minor bugfix for keyword args. -- 236209602 by Zhichao Lu: Add support for PartitionedVariable to get_variables_available_in_checkpoint. -- 235828658 by Zhichao Lu: Automatically stop evaluation jobs when training is finished. -- 235817964 by Zhichao Lu: Add an optional process_metrics_fn callback to eval_util, it gets called with evaluation results once each evaluation is complete. -- 235788721 by lzc: Fix yml file tf runtime version. -- 235262897 by Zhichao Lu: Add keypoint support to the random_pad_image preprocessor method. -- 235257380 by Zhichao Lu: Support InputDataFields.groundtruth_confidences in retain_groundtruth(), retain_groundtruth_with_positive_classes(), filter_groundtruth_with_crowd_boxes(), filter_groundtruth_with_nan_box_coordinates(), filter_unrecognized_classes(). -- 235109188 by Zhichao Lu: Fix bug in pad_input_data_to_static_shapes for num_additional_channels > 0; make color-specific data augmentation only touch RGB channels. -- 235045010 by Zhichao Lu: Don't slice class_predictions_with_background when add_background_class is false. -- 235026189 by lzc: Fix import in g3doc. -- 234863426 by Zhichao Lu: Added fixes in exporter to allow writing a checkpoint to a specified temporary directory. -- 234671886 by lzc: Internal Change. -- 234630803 by rathodv: Internal Change. -- 233985896 by Zhichao Lu: Add Neumann optimizer to object detection. -- 233560911 by Zhichao Lu: Add NAS-FPN object detection with Resnet and Mobilenet v2. -- 233513536 by Zhichao Lu: Export TPU compatible object detection model -- 233495772 by lzc: Internal change. -- 233453557 by Zhichao Lu: Create Keras-based SSD+MobilenetV1 for object detection. -- 233220074 by lzc: Update release notes date. -- 233165761 by Zhichao Lu: Support depth_multiplier and min_depth in _SSDResnetV1FpnFeatureExtractor. -- 233160046 by lzc: Internal change. -- 232926599 by Zhichao Lu: [tf.data] Switching tf.data functions to use `defun`, providing an escape hatch to continue using the legacy `Defun`. There are subtle differences between the implementation of `defun` and `Defun` (such as resources handling or control flow) and it is possible that input pipelines that use control flow or resources in their functions might be affected by this change. To migrate majority of existing pipelines to the recommended way of creating functions in TF 2.0 world, while allowing (a small number of) existing pipelines to continue relying on the deprecated behavior, this CL provides an escape hatch. If your input pipeline is affected by this CL, it should apply the escape hatch by replacing `foo.map(...)` with `foo.map_with_legacy_function(...)`. -- 232891621 by Zhichao Lu: Modify faster_rcnn meta architecture to normalize raw detections. -- 232875817 by Zhichao Lu: Make calibration a post-processing step. Specifically: - Move the calibration config from pipeline.proto --> post_processing.proto - Edit post_processing_builder.py to return a calibration function. If no calibration config is provided, it None. - Edit SSD and FasterRCNN meta architectures to optionally call the calibration function on detection scores after score conversion and before NMS. -- 232704481 by Zhichao Lu: Edit calibration builder to build a function that will be used within a detection model's `postprocess` method, after score conversion and before non-maxima suppression. Specific Edits: - The returned function now accepts class_predictions_with_background as its argument instead of detection_scores and detection_classes. - Class-specific calibration was temporarily removed, as it requires more significant refactoring. Will be added later. -- 232615379 by Zhichao Lu: Internal change -- 232483345 by ronnyvotel: Making the use of bfloat16 restricted to TPUs. -- 232399572 by Zhichao Lu: Edit calibration builder and proto to support class-agnostic calibration. Specifically: - Edit calibration protos to include path to relevant label map if required for class-specific calibration. Previously, label maps were inferred from other parts of the pipeline proto; this allows all information required by the builder stay within the calibration proto and remove extraneous information from being passed with class-agnostic calibration. - Add class-agnostic protos to the calibration config. Note that the proto supports sigmoid and linear interpolation parameters, but the builder currently only supports linear interpolation. -- 231613048 by Zhichao Lu: Add calibration builder for applying calibration transformations from output of object detection models. Specifically: - Add calibration proto to support sigmoid and isotonic regression (stepwise function) calibration. - Add a builder to support calibration from isotonic regression outputs. -- 231519786 by lzc: model_builder test refactor. - removed proto text boilerplate in each test case and let them call a create_default_proto function instead. - consolidated all separate ssd model creation tests into one. - consolidated all separate faster rcnn model creation tests into one. - used parameterized test for testing mask rcnn models and use_matmul_crop_and_resize - added all failures test. -- 231448169 by Zhichao Lu: Return static shape as a constant tensor. -- 231423126 by lzc: Add a release note for OID v4 models. -- 231401941 by Zhichao Lu: Adding correct labelmap for the models trained on Open Images V4 (*oid_v4 config suffix). -- 231320357 by Zhichao Lu: Add scope to Nearest Neighbor Resize op so that it stays in the same name scope as the original resize ops. -- 231257699 by Zhichao Lu: Switch to using preserve_aspect_ratio in tf.image.resize_images rather than using a custom implementation. -- 231247368 by rathodv: Internal change. -- 231004874 by lzc: Update documentations to use tf 1.12 for object detection API. -- 230999911 by rathodv: Use tf.batch_gather instead of ops.batch_gather -- 230999720 by huizhongc: Fix weight equalization test in ops_test. -- 230984728 by rathodv: Internal update. -- 230929019 by lzc: Add an option to replace preprocess operation with placeholder for ssd feature extractor. -- 230845266 by lzc: Require tensorflow version 1.12 for object detection API and rename keras_applications to keras_models -- 230392064 by lzc: Add RetinaNet 101 checkpoint trained on OID v4 to detection model zoo. -- 230014128 by derekjchow: This file was re-located below the tensorflow/lite/g3doc/convert -- 229941449 by lzc: Update SSD mobilenet v2 quantized model download path. -- 229843662 by lzc: Add an option to use native resize tf op in fpn top-down feature map generation. -- 229636034 by rathodv: Add deprecation notice to a few old parameters in train.proto -- 228959078 by derekjchow: Remove duplicate elif case in _check_and_convert_legacy_input_config_key -- 228749719 by rathodv: Minor refactoring to make exporter's `build_detection_graph` method public. -- 228573828 by rathodv: Mofity model.postprocess to return raw detections and raw scores. Modify, post-process methods in core/model.py and the meta architectures to export raw detection (without any non-max suppression) and raw multiclass score logits for those detections. -- 228420670 by Zhichao Lu: Add shims for custom architectures for object detection models. -- 228241692 by Zhichao Lu: Fix the comment on "losses_mask" in "Loss" class. -- 228223810 by Zhichao Lu: Support other_heads' predictions in WeightSharedConvolutionalBoxPredictor. Also remove a few unused parameters and fix a couple of comments in convolutional_box_predictor.py. -- 228200588 by Zhichao Lu: Add Expected Calibration Error and an evaluator that calculates the metric for object detections. -- 228167740 by lzc: Add option to use bounded activations in FPN top-down feature map generation. -- 227767700 by rathodv: Internal. -- 226295236 by Zhichao Lu: Add Open Image V4 Resnet101-FPN training config to third_party -- 226254842 by Zhichao Lu: Fix typo in documentation. -- 225833971 by Zhichao Lu: Option to have no resizer in object detection model. -- 225824890 by lzc: Fixes p3 compatibility for model_lib.py -- 225760897 by menglong: normalizer should be at least 1. -- 225559842 by menglong: Add extra logic filtering unrecognized classes. -- 225379421 by lzc: Add faster_rcnn_inception_resnet_v2_atrous_oid_v4 config to third_party -- 225368337 by Zhichao Lu: Add extra logic filtering unrecognized classes. -- 225341095 by Zhichao Lu: Adding Open Images V4 models to OD API model zoo and corresponding configs to the configs. -- 225218450 by menglong: Add extra logic filtering unrecognized classes. -- 225057591 by Zhichao Lu: Internal change. -- 224895417 by rathodv: Internal change. -- 224209282 by Zhichao Lu: Add two data augmentations to object detection: (1) Self-concat (2) Absolute pads. -- 224073762 by Zhichao Lu: Do not create tf.constant until _generate() is actually called in the object detector. -- PiperOrigin-RevId: 236813471
05584085 · pkulzc · Jonathan Huang · a5db4420 · 05584085 · 05584085
Commit 05584085 authored Mar 07, 2019 by pkulzc Committed by Jonathan Huang Mar 07, 2019
20 changed files
--- a/research/object_detection/models/ssd_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_feature_extractor_test.py
@@ -29,7 +29,7 @@ from object_detection.utils import test_case

 class SsdFeatureExtractorTestBase(test_case.TestCase):

-  def _build_conv_hyperparams(self):
+  def _build_conv_hyperparams(self, add_batch_norm=True):
    conv_hyperparams = hyperparams_pb2.Hyperparams()
    conv_hyperparams_text_proto = """
      activation: RELU_6
@@ -41,10 +41,14 @@ class SsdFeatureExtractorTestBase(test_case.TestCase):
        truncated_normal_initializer {
        }
      }
-      batch_norm {
-        scale: false
-      }
    """
+    if add_batch_norm:
+      batch_norm_proto = """
+        batch_norm {
+          scale: false
+        }
+      """
+      conv_hyperparams_text_proto += batch_norm_proto
    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
    return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)


--- a/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_feature_extractor_test.py
@@ -13,41 +13,69 @@
 # limitations under the License.
 # ==============================================================================

-"""Tests for ssd_mobilenet_v1_feature_extractor."""
+"""Tests for SSD Mobilenet V1 feature extractors.
+
+By using parameterized test decorator, this test serves for both Slim-based and
+Keras-based Mobilenet V1 feature extractors in SSD.
+"""
+from absl.testing import parameterized
+
 import numpy as np
 import tensorflow as tf

 from object_detection.models import ssd_feature_extractor_test
 from object_detection.models import ssd_mobilenet_v1_feature_extractor
+from object_detection.models import ssd_mobilenet_v1_keras_feature_extractor

 slim = tf.contrib.slim


+@parameterized.parameters(
+    {'use_keras': False},
+    {'use_keras': True},
+)
 class SsdMobilenetV1FeatureExtractorTest(
    ssd_feature_extractor_test.SsdFeatureExtractorTestBase):

  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                is_training=True, use_explicit_padding=False):
+                                use_explicit_padding=False, is_training=False,
+                                use_keras=False):
    """Constructs a new feature extractor.

    Args:
      depth_multiplier: float depth multiplier for feature extractor
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      is_training: whether the network is in training mode.
      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
        inputs so that the output dimensions are the same as if 'SAME' padding
        were used.
+      is_training: whether the network is in training mode.
+      use_keras: if True builds a keras-based feature extractor, if False builds
+        a slim-based one.
    Returns:
      an ssd_meta_arch.SSDFeatureExtractor object.
    """
    min_depth = 32
-    return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
-        is_training, depth_multiplier, min_depth, pad_to_multiple,
-        self.conv_hyperparams_fn,
-        use_explicit_padding=use_explicit_padding)
-
-  def test_extract_features_returns_correct_shapes_128(self):
+    if use_keras:
+      return (ssd_mobilenet_v1_keras_feature_extractor.
+              SSDMobileNetV1KerasFeatureExtractor(
+                  is_training=is_training,
+                  depth_multiplier=depth_multiplier,
+                  min_depth=min_depth,
+                  pad_to_multiple=pad_to_multiple,
+                  conv_hyperparams=self._build_conv_hyperparams(
+                      add_batch_norm=False),
+                  freeze_batchnorm=False,
+                  inplace_batchnorm_update=False,
+                  use_explicit_padding=use_explicit_padding,
+                  name='MobilenetV1'))
+    else:
+      return ssd_mobilenet_v1_feature_extractor.SSDMobileNetV1FeatureExtractor(
+          is_training, depth_multiplier, min_depth, pad_to_multiple,
+          self.conv_hyperparams_fn,
+          use_explicit_padding=use_explicit_padding)
+
+  def test_extract_features_returns_correct_shapes_128(self, use_keras):
    image_height = 128
    image_width = 128
    depth_multiplier = 1.0
@@ -57,12 +85,14 @@ class SsdMobilenetV1FeatureExtractorTest(
                                  (2, 1, 1, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=False)
+        expected_feature_map_shape, use_explicit_padding=False,
+        use_keras=use_keras)
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=True)
+        expected_feature_map_shape, use_explicit_padding=True,
+        use_keras=use_keras)

-  def test_extract_features_returns_correct_shapes_299(self):
+  def test_extract_features_returns_correct_shapes_299(self, use_keras):
    image_height = 299
    image_width = 299
    depth_multiplier = 1.0
@@ -72,12 +102,14 @@ class SsdMobilenetV1FeatureExtractorTest(
                                  (2, 2, 2, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=False)
+        expected_feature_map_shape, use_explicit_padding=False,
+        use_keras=use_keras)
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=True)
+        expected_feature_map_shape, use_explicit_padding=True,
+        use_keras=use_keras)

-  def test_extract_features_with_dynamic_image_shape(self):
+  def test_extract_features_with_dynamic_image_shape(self, use_keras):
    image_height = 128
    image_width = 128
    depth_multiplier = 1.0
@@ -87,12 +119,15 @@ class SsdMobilenetV1FeatureExtractorTest(
                                  (2, 1, 1, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shapes_with_dynamic_inputs(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=False)
+        expected_feature_map_shape, use_explicit_padding=False,
+        use_keras=use_keras)
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=True)
+        expected_feature_map_shape, use_explicit_padding=True,
+        use_keras=use_keras)

-  def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
+  def test_extract_features_returns_correct_shapes_enforcing_min_depth(
+      self, use_keras):
    image_height = 299
    image_width = 299
    depth_multiplier = 0.5**12
@@ -102,12 +137,15 @@ class SsdMobilenetV1FeatureExtractorTest(
                                  (2, 2, 2, 32), (2, 1, 1, 32)]
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=False)
+        expected_feature_map_shape, use_explicit_padding=False,
+        use_keras=use_keras)
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=True)
+        expected_feature_map_shape, use_explicit_padding=True,
+        use_keras=use_keras)

-  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
+  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(
+      self, use_keras):
    image_height = 299
    image_width = 299
    depth_multiplier = 1.0
@@ -117,48 +155,63 @@ class SsdMobilenetV1FeatureExtractorTest(
                                  (2, 2, 2, 256), (2, 1, 1, 128)]
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=False)
+        expected_feature_map_shape, use_explicit_padding=False,
+        use_keras=use_keras)
    self.check_extract_features_returns_correct_shape(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
-        expected_feature_map_shape, use_explicit_padding=True)
+        expected_feature_map_shape, use_explicit_padding=True,
+        use_keras=use_keras)

-  def test_extract_features_raises_error_with_invalid_image_size(self):
+  def test_extract_features_raises_error_with_invalid_image_size(
+      self, use_keras):
    image_height = 32
    image_width = 32
    depth_multiplier = 1.0
    pad_to_multiple = 1
    self.check_extract_features_raises_error_with_invalid_image_size(
-        image_height, image_width, depth_multiplier, pad_to_multiple)
+        image_height, image_width, depth_multiplier, pad_to_multiple,
+        use_keras=use_keras)

-  def test_preprocess_returns_correct_value_range(self):
+  def test_preprocess_returns_correct_value_range(self, use_keras):
    image_height = 128
    image_width = 128
    depth_multiplier = 1
    pad_to_multiple = 1
    test_image = np.random.rand(2, image_height, image_width, 3)
    feature_extractor = self._create_feature_extractor(depth_multiplier,
-                                                       pad_to_multiple)
+                                                       pad_to_multiple,
+                                                       use_keras=use_keras)
    preprocessed_image = feature_extractor.preprocess(test_image)
    self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))

-  def test_variables_only_created_in_scope(self):
+  def test_variables_only_created_in_scope(self, use_keras):
    depth_multiplier = 1
    pad_to_multiple = 1
    scope_name = 'MobilenetV1'
    self.check_feature_extractor_variables_under_scope(
-        depth_multiplier, pad_to_multiple, scope_name)
+        depth_multiplier, pad_to_multiple, scope_name, use_keras=use_keras)

-  def test_has_fused_batchnorm(self):
+  def test_variable_count(self, use_keras):
+    depth_multiplier = 1
+    pad_to_multiple = 1
+    variables = self.get_feature_extractor_variables(
+        depth_multiplier, pad_to_multiple, use_keras=use_keras)
+    self.assertEqual(len(variables), 151)
+
+  def test_has_fused_batchnorm(self, use_keras):
    image_height = 40
    image_width = 40
    depth_multiplier = 1
    pad_to_multiple = 1
    image_placeholder = tf.placeholder(tf.float32,
                                       [1, image_height, image_width, 3])
-    feature_extractor = self._create_feature_extractor(depth_multiplier,
-                                                       pad_to_multiple)
+    feature_extractor = self._create_feature_extractor(
+        depth_multiplier, pad_to_multiple, use_keras=use_keras)
    preprocessed_image = feature_extractor.preprocess(image_placeholder)
-    _ = feature_extractor.extract_features(preprocessed_image)
+    if use_keras:
+      _ = feature_extractor(preprocessed_image)
+    else:
+      _ = feature_extractor.extract_features(preprocessed_image)
    self.assertTrue(any(op.type == 'FusedBatchNorm'
                        for op in tf.get_default_graph().get_operations()))


--- a/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v1_keras_feature_extractor.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""SSDFeatureExtractor for Keras MobilenetV1 features."""
+
+import tensorflow as tf
+
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import feature_map_generators
+from object_detection.models.keras_models import mobilenet_v1
+from object_detection.utils import ops
+from object_detection.utils import shape_utils
+
+slim = tf.contrib.slim
+
+
+class SSDMobileNetV1KerasFeatureExtractor(
+    ssd_meta_arch.SSDKerasFeatureExtractor):
+  """SSD Feature Extractor using Keras MobilenetV1 features."""
+
+  def __init__(self,
+               is_training,
+               depth_multiplier,
+               min_depth,
+               pad_to_multiple,
+               conv_hyperparams,
+               freeze_batchnorm,
+               inplace_batchnorm_update,
+               use_explicit_padding=False,
+               use_depthwise=False,
+               override_base_feature_extractor_hyperparams=False,
+               name=None):
+    """Keras MobileNetV1 Feature Extractor for SSD Models.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
+      freeze_batchnorm: Whether to freeze batch norm parameters during
+        training or not. When training with a small batch size (e.g. 1), it is
+        desirable to freeze batch norm update and use pretrained batch norm
+        params.
+      inplace_batchnorm_update: Whether to update batch norm moving average
+        values inplace. When this is false train op must add a control
+        dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+        batch norm statistics.
+      use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+        inputs so that the output dimensions are the same as if 'SAME' padding
+        were used.
+      use_depthwise: Whether to use depthwise convolutions. Default is False.
+      override_base_feature_extractor_hyperparams: Whether to override
+        hyperparameters of the base feature extractor with the one from
+        `conv_hyperparams`.
+      name: A string name scope to assign to the model. If 'None', Keras
+        will auto-generate one from the class name.
+    """
+    super(SSDMobileNetV1KerasFeatureExtractor, self).__init__(
+        is_training=is_training,
+        depth_multiplier=depth_multiplier,
+        min_depth=min_depth,
+        pad_to_multiple=pad_to_multiple,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=freeze_batchnorm,
+        inplace_batchnorm_update=inplace_batchnorm_update,
+        use_explicit_padding=use_explicit_padding,
+        use_depthwise=use_depthwise,
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams,
+        name=name)
+    self._feature_map_layout = {
+        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
+                       '', ''],
+        'layer_depth': [-1, -1, 512, 256, 256, 128],
+        'use_explicit_padding': self._use_explicit_padding,
+        'use_depthwise': self._use_depthwise,
+    }
+    self._mobilenet_v1 = None
+    self._feature_map_generator = None
+
+  def build(self, input_shape):
+    full_mobilenet_v1 = mobilenet_v1.mobilenet_v1(
+        batchnorm_training=(self._is_training and not self._freeze_batchnorm),
+        conv_hyperparams=(self._conv_hyperparams
+                          if self._override_base_feature_extractor_hyperparams
+                          else None),
+        weights=None,
+        use_explicit_padding=self._use_explicit_padding,
+        alpha=self._depth_multiplier,
+        min_depth=self._min_depth,
+        include_top=False)
+    conv2d_11_pointwise = full_mobilenet_v1.get_layer(
+        name='conv_pw_11_relu').output
+    conv2d_13_pointwise = full_mobilenet_v1.get_layer(
+        name='conv_pw_13_relu').output
+    self._mobilenet_v1 = tf.keras.Model(
+        inputs=full_mobilenet_v1.inputs,
+        outputs=[conv2d_11_pointwise, conv2d_13_pointwise])
+    self._feature_map_generator = (
+        feature_map_generators.KerasMultiResolutionFeatureMaps(
+            feature_map_layout=self._feature_map_layout,
+            depth_multiplier=self._depth_multiplier,
+            min_depth=self._min_depth,
+            insert_1x1_conv=True,
+            is_training=self._is_training,
+            conv_hyperparams=self._conv_hyperparams,
+            freeze_batchnorm=self._freeze_batchnorm,
+            name='FeatureMaps'))
+    self.built = True
+
+  def preprocess(self, resized_inputs):
+    """SSD preprocessing.
+
+    Maps pixel values to the range [-1, 1].
+
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    return (2.0 / 255.0) * resized_inputs - 1.0
+
+  def _extract_features(self, preprocessed_inputs):
+    """Extract features from preprocessed inputs.
+
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      feature_maps: a list of tensors where the ith tensor has shape
+        [batch, height_i, width_i, depth_i]
+    """
+    preprocessed_inputs = shape_utils.check_min_image_dim(
+        33, preprocessed_inputs)
+
+    image_features = self._mobilenet_v1(
+        ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))
+
+    feature_maps = self._feature_map_generator({
+        'Conv2d_11_pointwise': image_features[0],
+        'Conv2d_13_pointwise': image_features[1]})
+
+    return feature_maps.values()
--- a/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
+++ b/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
@@ -19,7 +19,7 @@ import tensorflow as tf

 from object_detection.meta_architectures import ssd_meta_arch
 from object_detection.models import feature_map_generators
-from object_detection.models.keras_applications import mobilenet_v2
+from object_detection.models.keras_models import mobilenet_v2
 from object_detection.utils import ops
 from object_detection.utils import shape_utils


--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
@@ -53,8 +53,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-        UNUSED currently.
-      min_depth: minimum feature extractor depth. UNUSED Currently.
+      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
@@ -96,9 +95,6 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
        use_depthwise=use_depthwise,
        override_base_feature_extractor_hyperparams=
        override_base_feature_extractor_hyperparams)
-    if self._depth_multiplier != 1.0:
-      raise ValueError('Only depth 1.0 is supported, found: {}'.
-                       format(self._depth_multiplier))
    if self._use_explicit_padding is True:
      raise ValueError('Explicit padding is not a valid option.')
    self._resnet_base_fn = resnet_base_fn
@@ -150,13 +146,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
-
-    Raises:
-      ValueError: depth multiplier is not supported.
    """
-    if self._depth_multiplier != 1.0:
-      raise ValueError('Depth multiplier not supported.')
-
    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

@@ -174,8 +164,11 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
              global_pool=False,
              output_stride=None,
              store_non_strided_activations=True,
+              min_base_depth=self._min_depth,
+              depth_multiplier=self._depth_multiplier,
              scope=scope)
          image_features = self._filter_features(image_features)
+      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope(self._fpn_scope_name,
                               reuse=self._reuse_weights):
@@ -185,7 +178,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
            feature_block_list.append('block{}'.format(level - 1))
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
-              depth=self._additional_layer_depth)
+              depth=depth_fn(self._additional_layer_depth))
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(
@@ -196,7 +189,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
          for i in range(base_fpn_max_level, self._fpn_max_level):
            last_feature_map = slim.conv2d(
                last_feature_map,
-                num_outputs=self._additional_layer_depth,
+                num_outputs=depth_fn(self._additional_layer_depth),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
@@ -226,8 +219,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-        UNUSED currently.
-      min_depth: minimum feature extractor depth. UNUSED Currently.
+      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
@@ -284,8 +276,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-        UNUSED currently.
-      min_depth: minimum feature extractor depth. UNUSED Currently.
+      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
@@ -342,8 +333,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
-        UNUSED currently.
-      min_depth: minimum feature extractor depth. UNUSED Currently.
+      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d

--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_test.py
@@ -25,8 +25,7 @@ class SSDResnet50V1FeatureExtractorTest(
  """SSDResnet50v1Fpn feature extractor test."""

  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                use_explicit_padding=False):
-    min_depth = 32
+                                use_explicit_padding=False, min_depth=32):
    is_training = True
    return ssd_resnet_v1_fpn_feature_extractor.SSDResnet50V1FpnFeatureExtractor(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
@@ -42,8 +41,7 @@ class SSDResnet101V1FeatureExtractorTest(
  """SSDResnet101v1Fpn feature extractor test."""

  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                use_explicit_padding=False):
-    min_depth = 32
+                                use_explicit_padding=False, min_depth=32):
    is_training = True
    return (
        ssd_resnet_v1_fpn_feature_extractor.SSDResnet101V1FpnFeatureExtractor(
@@ -64,8 +62,7 @@ class SSDResnet152V1FeatureExtractorTest(
  """SSDResnet152v1Fpn feature extractor test."""

  def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
-                                use_explicit_padding=False):
-    min_depth = 32
+                                use_explicit_padding=False, min_depth=32):
    is_training = True
    return (
        ssd_resnet_v1_fpn_feature_extractor.SSDResnet152V1FpnFeatureExtractor(

--- a/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
+++ b/research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
@@ -14,6 +14,7 @@
 # ==============================================================================
 """Tests for ssd resnet v1 FPN feature extractors."""
 import abc
+import itertools
 import numpy as np
 import tensorflow as tf

@@ -32,6 +33,14 @@ class SSDResnetFPNFeatureExtractorTestBase(
  def _fpn_scope_name(self):
    return 'fpn'

+  @abc.abstractmethod
+  def _create_feature_extractor(self,
+                                depth_multiplier,
+                                pad_to_multiple,
+                                use_explicit_padding=False,
+                                min_depth=32):
+    pass
+
  def test_extract_features_returns_correct_shapes_256(self):
    image_height = 256
    image_width = 256
@@ -56,6 +65,45 @@ class SSDResnetFPNFeatureExtractorTestBase(
        2, image_height, image_width, depth_multiplier, pad_to_multiple,
        expected_feature_map_shape)

+  def test_extract_features_returns_correct_shapes_with_depth_multiplier(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 0.5
+    expected_num_channels = int(256 * depth_multiplier)
+    pad_to_multiple = 1
+    expected_feature_map_shape = [(2, 32, 32, expected_num_channels),
+                                  (2, 16, 16, expected_num_channels),
+                                  (2, 8, 8, expected_num_channels),
+                                  (2, 4, 4, expected_num_channels),
+                                  (2, 2, 2, expected_num_channels)]
+    self.check_extract_features_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier, pad_to_multiple,
+        expected_feature_map_shape)
+
+  def test_extract_features_returns_correct_shapes_with_min_depth(self):
+    image_height = 256
+    image_width = 256
+    depth_multiplier = 1.0
+    pad_to_multiple = 1
+    min_depth = 320
+    expected_feature_map_shape = [(2, 32, 32, min_depth),
+                                  (2, 16, 16, min_depth),
+                                  (2, 8, 8, min_depth),
+                                  (2, 4, 4, min_depth),
+                                  (2, 2, 2, min_depth)]
+
+    def graph_fn(image_tensor):
+      feature_extractor = self._create_feature_extractor(
+          depth_multiplier, pad_to_multiple, min_depth=min_depth)
+      return feature_extractor.extract_features(image_tensor)
+
+    image_tensor = np.random.rand(2, image_height, image_width,
+                                  3).astype(np.float32)
+    feature_maps = self.execute(graph_fn, [image_tensor])
+    for feature_map, expected_shape in itertools.izip(
+        feature_maps, expected_feature_map_shape):
+      self.assertAllEqual(feature_map.shape, expected_shape)
+
  def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
    image_height = 254
    image_width = 254

--- a/research/object_detection/object_detection_tutorial.ipynb
+++ b/research/object_detection/object_detection_tutorial.ipynb
@@ -54,8 +54,8 @@
        "sys.path.append(\"..\")\n",
        "from object_detection.utils import ops as utils_ops\n",
        "\n",
-        "if StrictVersion(tf.__version__) \u003c StrictVersion('1.9.0'):\n",
-        "  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')\n"
+        "if StrictVersion(tf.__version__) \u003c StrictVersion('1.12.0'):\n",
+        "  raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')\n"
      ]
    },
    {

--- a/research/object_detection/predictors/convolutional_box_predictor.py
+++ b/research/object_detection/predictors/convolutional_box_predictor.py
@@ -108,14 +108,16 @@ class ConvolutionalBoxPredictor(box_predictor.BoxPredictor):
        feature map.

    Returns:
-      box_encodings: A list of float tensors of shape
-        [batch_size, num_anchors_i, q, code_size] representing the location of
-        the objects, where q is 1 or the number of classes. Each entry in the
-        list corresponds to a feature map in the input `image_features` list.
-      class_predictions_with_background: A list of float tensors of shape
-        [batch_size, num_anchors_i, num_classes + 1] representing the class
-        predictions for the proposals. Each entry in the list corresponds to a
-        feature map in the input `image_features` list.
+      A dictionary containing:
+        box_encodings: A list of float tensors of shape
+          [batch_size, num_anchors_i, q, code_size] representing the location of
+          the objects, where q is 1 or the number of classes. Each entry in the
+          list corresponds to a feature map in the input `image_features` list.
+        class_predictions_with_background: A list of float tensors of shape
+          [batch_size, num_anchors_i, num_classes + 1] representing the class
+          predictions for the proposals. Each entry in the list corresponds to a
+          feature map in the input `image_features` list.
+        (optional) Predictions from other heads.
    """
    predictions = {
        BOX_ENCODINGS: [],
@@ -226,8 +228,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
      kernel_size: Size of final convolution kernel.
      apply_batch_norm: Whether to apply batch normalization to conv layers in
        this predictor.
-      share_prediction_tower: Whether to share the multi-layer tower between box
-        prediction and class prediction heads.
+      share_prediction_tower: Whether to share the multi-layer tower among box
+        prediction head, class prediction head and other heads.
      use_depthwise: Whether to use depthwise separable conv2d instead of
       regular conv2d.
    """
@@ -270,9 +272,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
    inserted_layer_counter += 1
    return image_feature, inserted_layer_counter

-  def _compute_base_tower(self, tower_name_scope, image_feature, feature_index,
-                          has_different_feature_channels, target_channel,
-                          inserted_layer_counter):
+  def _compute_base_tower(self, tower_name_scope, image_feature, feature_index):
    net = image_feature
    for i in range(self._num_layers_before_predictor):
      if self._use_depthwise:
@@ -296,23 +296,18 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
    return net

  def _predict_head(self, head_name, head_obj, image_feature, box_tower_feature,
-                    feature_index, has_different_feature_channels,
-                    target_channel, inserted_layer_counter,
-                    num_predictions_per_location):
+                    feature_index, num_predictions_per_location):
    if head_name == CLASS_PREDICTIONS_WITH_BACKGROUND:
      tower_name_scope = 'ClassPredictionTower'
    else:
-      raise ValueError('Unknown head')
+      tower_name_scope = head_name + 'PredictionTower'
    if self._share_prediction_tower:
      head_tower_feature = box_tower_feature
    else:
      head_tower_feature = self._compute_base_tower(
          tower_name_scope=tower_name_scope,
          image_feature=image_feature,
-          feature_index=feature_index,
-          has_different_feature_channels=has_different_feature_channels,
-          target_channel=target_channel,
-          inserted_layer_counter=inserted_layer_counter)
+          feature_index=feature_index)
    return head_obj.predict(
        features=head_tower_feature,
        num_predictions_per_location=num_predictions_per_location)
@@ -341,13 +336,13 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
          [batch_size, num_anchors_i, num_classes + 1] representing the class
          predictions for the proposals. Each entry in the list corresponds to a
          feature map in the input `image_features` list.
-        (optional) mask_predictions: A list of float tensors of shape
+        (optional) Predictions from other heads.
+          E.g., mask_predictions: A list of float tensors of shape
          [batch_size, num_anchord_i, num_classes, mask_height, mask_width].


    Raises:
-      ValueError: If the image feature maps do not have the same number of
-        channels or if the num predictions per locations is differs between the
+      ValueError: If the num predictions per locations differs between the
        feature maps.
    """
    if len(set(num_predictions_per_location_list)) > 1:
@@ -392,10 +387,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
          box_tower_feature = self._compute_base_tower(
              tower_name_scope=box_tower_scope,
              image_feature=image_feature,
-              feature_index=feature_index,
-              has_different_feature_channels=has_different_feature_channels,
-              target_channel=target_channel,
-              inserted_layer_counter=inserted_layer_counter)
+              feature_index=feature_index)
          box_encodings = self._box_prediction_head.predict(
              features=box_tower_feature,
              num_predictions_per_location=num_predictions_per_location)
@@ -413,9 +405,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
                image_feature=image_feature,
                box_tower_feature=box_tower_feature,
                feature_index=feature_index,
-                has_different_feature_channels=has_different_feature_channels,
-                target_channel=target_channel,
-                inserted_layer_counter=inserted_layer_counter,
                num_predictions_per_location=num_predictions_per_location)
            predictions[head_name].append(prediction)
    return predictions
+
+
--- a/research/object_detection/predictors/convolutional_box_predictor_test.py
+++ b/research/object_detection/predictors/convolutional_box_predictor_test.py
@@ -14,6 +14,8 @@
 # ==============================================================================

 """Tests for object_detection.predictors.convolutional_box_predictor."""
+
+from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf

@@ -21,6 +23,9 @@ from google.protobuf import text_format
 from object_detection.builders import box_predictor_builder
 from object_detection.builders import hyperparams_builder
 from object_detection.predictors import convolutional_box_predictor as box_predictor
+from object_detection.predictors.heads import box_head
+from object_detection.predictors.heads import class_head
+from object_detection.predictors.heads import mask_head
 from object_detection.protos import hyperparams_pb2
 from object_detection.utils import test_case

@@ -852,5 +857,66 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
      self.assertAllEqual(objectness_predictions_shape,
                          [4, expected_num_anchors, 1])

+  def test_other_heads_predictions(self):
+    box_code_size = 4
+    num_classes_without_background = 3
+    other_head_name = 'Mask'
+    mask_height = 5
+    mask_width = 5
+    num_predictions_per_location = 5
+
+    def graph_fn(image_features):
+      box_prediction_head = box_head.WeightSharedConvolutionalBoxHead(
+          box_code_size)
+      class_prediction_head = class_head.WeightSharedConvolutionalClassHead(
+          num_classes_without_background + 1)
+      other_heads = {
+          other_head_name:
+              mask_head.WeightSharedConvolutionalMaskHead(
+                  num_classes_without_background,
+                  mask_height=mask_height,
+                  mask_width=mask_width)
+      }
+      conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
+          is_training=False,
+          num_classes=num_classes_without_background,
+          box_prediction_head=box_prediction_head,
+          class_prediction_head=class_prediction_head,
+          other_heads=other_heads,
+          conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
+          depth=32,
+          num_layers_before_predictor=2)
+      box_predictions = conv_box_predictor.predict(
+          [image_features],
+          num_predictions_per_location=[num_predictions_per_location],
+          scope='BoxPredictor')
+      for key, value in box_predictions.items():
+        box_predictions[key] = tf.concat(value, axis=1)
+      assert len(box_predictions) == 3
+      return (box_predictions[box_predictor.BOX_ENCODINGS],
+              box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
+              box_predictions[other_head_name])
+
+    batch_size = 4
+    feature_ht = 8
+    feature_wt = 8
+    image_features = np.random.rand(batch_size, feature_ht, feature_wt,
+                                    64).astype(np.float32)
+    (box_encodings, class_predictions, other_head_predictions) = self.execute(
+        graph_fn, [image_features])
+    num_anchors = feature_ht * feature_wt * num_predictions_per_location
+    self.assertAllEqual(box_encodings.shape,
+                        [batch_size, num_anchors, box_code_size])
+    self.assertAllEqual(
+        class_predictions.shape,
+        [batch_size, num_anchors, num_classes_without_background + 1])
+    self.assertAllEqual(other_head_predictions.shape, [
+        batch_size, num_anchors, num_classes_without_background, mask_height,
+        mask_width
+    ])
+
+
+
+
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/predictors/convolutional_keras_box_predictor_test.py
+++ b/research/object_detection/predictors/convolutional_keras_box_predictor_test.py
@@ -191,7 +191,69 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
    self.assertEqual(conv_box_predictor._sorted_head_names,
                     ['box_encodings', 'class_predictions_with_background'])

-  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
+  def test_use_depthwise_convolution(self):
+    image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
+    conv_box_predictor = (
+        box_predictor_builder.build_convolutional_keras_box_predictor(
+            is_training=False,
+            num_classes=0,
+            conv_hyperparams=self._build_conv_hyperparams(),
+            freeze_batchnorm=False,
+            inplace_batchnorm_update=False,
+            num_predictions_per_location_list=[5],
+            min_depth=0,
+            max_depth=32,
+            num_layers_before_predictor=1,
+            use_dropout=True,
+            dropout_keep_prob=0.8,
+            kernel_size=1,
+            box_code_size=4,
+            use_depthwise=True
+        ))
+    box_predictions = conv_box_predictor([image_features])
+    box_encodings = tf.concat(
+        box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
+    objectness_predictions = tf.concat(
+        box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
+        axis=1)
+    init_op = tf.global_variables_initializer()
+
+    resolution = 32
+    expected_num_anchors = resolution*resolution*5
+    with self.test_session() as sess:
+      sess.run(init_op)
+      (box_encodings_shape,
+       objectness_predictions_shape) = sess.run(
+           [tf.shape(box_encodings), tf.shape(objectness_predictions)],
+           feed_dict={image_features:
+                      np.random.rand(4, resolution, resolution, 64)})
+      actual_variable_set = set(
+          [var.op.name for var in tf.trainable_variables()])
+    self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
+    self.assertAllEqual(objectness_predictions_shape,
+                        [4, expected_num_anchors, 1])
+    expected_variable_set = set([
+        'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/bias',
+        'BoxPredictor/SharedConvolutions_0/Conv2d_0_1x1_32/kernel',
+
+        'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor_depthwise/'
+        'bias',
+
+        'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor_depthwise/'
+        'depthwise_kernel',
+
+        'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias',
+        'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel',
+        'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor_depthwise/bias',
+
+        'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor_depthwise/'
+        'depthwise_kernel',
+
+        'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias',
+        'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/kernel'])
+    self.assertEqual(expected_variable_set, actual_variable_set)
+    self.assertEqual(conv_box_predictor._sorted_head_names,
+                     ['box_encodings', 'class_predictions_with_background'])

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/predictors/heads/keras_box_head_test.py
+++ b/research/object_detection/predictors/heads/keras_box_head_test.py
@@ -56,7 +56,20 @@ class ConvolutionalKerasBoxHeadTest(test_case.TestCase):
    box_encodings = box_prediction_head(image_feature)
    self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())

-  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
+  def test_prediction_size_depthwise_true(self):
+    conv_hyperparams = self._build_conv_hyperparams()
+    box_prediction_head = keras_box_head.ConvolutionalBoxHead(
+        is_training=True,
+        box_code_size=4,
+        kernel_size=3,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=False,
+        num_predictions_per_location=1,
+        use_depthwise=True)
+    image_feature = tf.random_uniform(
+        [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+    box_encodings = box_prediction_head(image_feature)
+    self.assertAllEqual([64, 323, 1, 4], box_encodings.get_shape().as_list())

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/predictors/heads/keras_class_head_test.py
+++ b/research/object_detection/predictors/heads/keras_class_head_test.py
@@ -59,7 +59,23 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
    self.assertAllEqual([64, 323, 20],
                        class_predictions.get_shape().as_list())

-  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
+  def test_prediction_size_depthwise_true(self):
+    conv_hyperparams = self._build_conv_hyperparams()
+    class_prediction_head = keras_class_head.ConvolutionalClassHead(
+        is_training=True,
+        num_class_slots=20,
+        use_dropout=True,
+        dropout_keep_prob=0.5,
+        kernel_size=3,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=False,
+        num_predictions_per_location=1,
+        use_depthwise=True)
+    image_feature = tf.random_uniform(
+        [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+    class_predictions = class_prediction_head(image_feature,)
+    self.assertAllEqual([64, 323, 20],
+                        class_predictions.get_shape().as_list())

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/predictors/heads/keras_mask_head_test.py
+++ b/research/object_detection/predictors/heads/keras_mask_head_test.py
@@ -61,7 +61,25 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
    self.assertAllEqual([64, 323, 20, 7, 7],
                        mask_predictions.get_shape().as_list())

-  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
+  def test_prediction_size_use_depthwise_true(self):
+    conv_hyperparams = self._build_conv_hyperparams()
+    mask_prediction_head = keras_mask_head.ConvolutionalMaskHead(
+        is_training=True,
+        num_classes=20,
+        use_dropout=True,
+        dropout_keep_prob=0.5,
+        kernel_size=3,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=False,
+        num_predictions_per_location=1,
+        use_depthwise=True,
+        mask_height=7,
+        mask_width=7)
+    image_feature = tf.random_uniform(
+        [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+    mask_predictions = mask_prediction_head(image_feature)
+    self.assertAllEqual([64, 323, 20, 7, 7],
+                        mask_predictions.get_shape().as_list())

  def test_class_agnostic_prediction_size_use_depthwise_false(self):
    conv_hyperparams = self._build_conv_hyperparams()
@@ -84,7 +102,26 @@ class ConvolutionalMaskPredictorTest(test_case.TestCase):
    self.assertAllEqual([64, 323, 1, 7, 7],
                        mask_predictions.get_shape().as_list())

-  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
+  def test_class_agnostic_prediction_size_use_depthwise_true(self):
+    conv_hyperparams = self._build_conv_hyperparams()
+    mask_prediction_head = keras_mask_head.ConvolutionalMaskHead(
+        is_training=True,
+        num_classes=20,
+        use_dropout=True,
+        dropout_keep_prob=0.5,
+        kernel_size=3,
+        conv_hyperparams=conv_hyperparams,
+        freeze_batchnorm=False,
+        num_predictions_per_location=1,
+        use_depthwise=True,
+        mask_height=7,
+        mask_width=7,
+        masks_are_class_agnostic=True)
+    image_feature = tf.random_uniform(
+        [64, 17, 19, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
+    mask_predictions = mask_prediction_head(image_feature)
+    self.assertAllEqual([64, 323, 1, 7, 7],
+                        mask_predictions.get_shape().as_list())

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/protos/calibration.proto
+++ b/research/object_detection/protos/calibration.proto
+syntax = "proto2";
+
+package object_detection.protos;
+
+// Message wrapper for various calibration configurations
+message CalibrationConfig {
+  oneof calibrator {
+    // Class-agnostic calibration via linear interpolation (usually output from
+    // isotonic regression)
+    FunctionApproximation function_approximation = 1;
+
+    // Per-class calibration via linear interpolation
+    LabelFunctionApproximations label_function_approximations = 2;
+
+    // Class-agnostic sigmoid calibration
+    SigmoidCalibration sigmoid_calibration = 3;
+
+    // Per-class sigmoid calibration
+    LabelSigmoidCalibrations label_sigmoid_calibrations = 4;
+  }
+}
+
+// Message for class-agnostic domain/range mapping for function
+// approximations
+message FunctionApproximation {
+  // Message mapping class labels to indices
+  optional XYPairs x_y_pairs = 1;
+}
+
+// Message for class-specific domain/range mapping for function
+// approximations
+message LabelFunctionApproximations {
+  // Message mapping class labels to indices
+  map<string, XYPairs> label_xy_pairs_map = 1;
+  // Label map to map label names from to class ids.
+  optional string label_map_path = 2;
+}
+
+// Message for class-agnostic Sigmoid Calibration
+message SigmoidCalibration {
+  // Message mapping class index to Sigmoid Parameters
+  optional SigmoidParameters sigmoid_parameters = 1;
+}
+
+// Message for class-specific Sigmoid Calibration
+message LabelSigmoidCalibrations {
+  // Message mapping class index to Sigmoid Parameters
+  map<string, SigmoidParameters> label_sigmoid_parameters_map = 1;
+  // Label map to map label names from to class ids.
+  optional string label_map_path = 2;
+}
+
+// Message to store a domain/range pair for function to be approximated
+message XYPairs {
+  message XYPair {
+    optional float x = 1;
+    optional float y = 2;
+  }
+
+  // Sequence of x/y pairs for function approximation
+  repeated XYPair x_y_pair = 1;
+}
+
+// Message defining parameters for sigmoid calibration.
+message SigmoidParameters {
+  optional float a = 1 [default = -1.0];
+  optional float b = 2 [default = 0.0];
+}
--- a/research/object_detection/protos/image_resizer.proto
+++ b/research/object_detection/protos/image_resizer.proto
@@ -8,6 +8,7 @@ message ImageResizer {
  oneof image_resizer_oneof {
    KeepAspectRatioResizer keep_aspect_ratio_resizer = 1;
    FixedShapeResizer fixed_shape_resizer = 2;
+    IdentityResizer identity_resizer = 3;
  }
 }

@@ -19,6 +20,9 @@ enum ResizeType {
  AREA = 3; // Corresponds to tf.image.ResizeMethod.AREA
 }

+message IdentityResizer {
+}
+
 // Configuration proto for image resizer that keeps aspect ratio.
 message KeepAspectRatioResizer {
  // Desired size of the smaller image dimension in pixels.

--- a/research/object_detection/protos/input_reader.proto
+++ b/research/object_detection/protos/input_reader.proto
@@ -22,7 +22,7 @@ enum InstanceMaskType {
  PNG_MASKS = 2;        // Encoded PNG masks.
 }

-// Next id: 24
+// Next id: 25
 message InputReader {
  // Name of input reader. Typically used to describe the dataset that is read
  // by this input reader.
@@ -94,6 +94,9 @@ message InputReader {
  // otherwise some groundtruth boxes may be clipped.
  optional int32 max_number_of_boxes = 21 [default=100];

+  // Whether to load multiclass scores from the dataset.
+  optional bool load_multiclass_scores = 24 [default = false];
+
  // Whether to load groundtruth instance masks.
  optional bool load_instance_masks = 7 [default = false];


--- a/research/object_detection/protos/optimizer.proto
+++ b/research/object_detection/protos/optimizer.proto
@@ -38,6 +38,7 @@ message AdamOptimizer {
  optional LearningRate learning_rate = 1;
 }

+
 // Configuration message for optimizer learning rate.
 message LearningRate {
  oneof learning_rate {

--- a/research/object_detection/protos/post_processing.proto
+++ b/research/object_detection/protos/post_processing.proto
@@ -2,6 +2,8 @@ syntax = "proto2";

 package object_detection.protos;

+import "object_detection/protos/calibration.proto";
+
 // Configuration proto for non-max-suppression operation on a batch of
 // detections.
 message BatchNonMaxSuppression {
@@ -46,4 +48,7 @@ message PostProcessing {
  // Typically used for softmax distillation, though can be used to scale for
  // other reasons.
  optional float logit_scale = 3 [default = 1.0];
+  // Calibrate score outputs. Calibration is applied after score converter
+  // and before non max suppression.
+  optional CalibrationConfig calibration_config = 4;
 }
--- a/research/object_detection/protos/preprocessor.proto
+++ b/research/object_detection/protos/preprocessor.proto
@@ -34,6 +34,8 @@ message PreprocessingStep {
    RandomRotation90 random_rotation90 = 26;
    RGBtoGray rgb_to_gray = 27;
    ConvertClassLogitsToSoftmax convert_class_logits_to_softmax = 28;
+    RandomAbsolutePadImage random_absolute_pad_image = 29;
+    RandomSelfConcatImage random_self_concat_image = 30;
  }
 }

@@ -179,6 +181,18 @@ message RandomPadImage {
  repeated float pad_color = 5;
 }

+// Randomly adds a padding of size [0, max_height_padding), [0, max_width_padding).
+message RandomAbsolutePadImage {
+  // Height will be padded uniformly at random from [0, max_height_padding).
+  optional int32 max_height_padding = 1;
+  // Width will be padded uniformly at random from [0, max_width_padding).
+  optional int32 max_width_padding = 2;
+
+  // Color of the padding. If unset, will pad using average color of the input
+  // image.
+  repeated float pad_color = 3;
+}
+
 // Randomly crops an image followed by a random pad.
 message RandomCropPadImage {
  // Cropping operation must cover at least one box by this fraction.
@@ -243,8 +257,8 @@ message RandomBlackPatches {

 // Randomly resizes the image up to [target_height, target_width].
 message RandomResizeMethod {
-  optional float target_height = 1;
-  optional float target_width = 2;
+  optional int32 target_height = 1;
+  optional int32 target_width = 2;
 }

 // Converts the RGB image to a grayscale image. This also converts the image
@@ -439,3 +453,11 @@ message ConvertClassLogitsToSoftmax {
  // Scale to use on logits before applying softmax.
  optional float temperature = 1 [default=1.0];
 }
+
+// Randomly concatenates the image with itself horizontally and/or vertically.
+message RandomSelfConcatImage {
+  // Probability of concatenating the image vertically.
+  optional float concat_vertical_probability = 1 [default = 0.1];
+  // Probability of concatenating the image horizontally.
+  optional float concat_horizontal_probability = 2 [default = 0.1];
+}