Minor fixes for object detection.

214018767 by Zhichao Lu: Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image -- 213914693 by lzc: Internal change. -- 213872175 by Zhichao Lu: This CL adds a Keras-based mobilenet_v2 feature extractor for object detection models. As part of this CL, we use the Keras mobilenet_v2 application's keyword argument layer injection API to allow the generated network to support the object detection hyperparameters. -- 213848499 by Zhichao Lu: Replace tf.image.resize_nearest_neighbor with tf.image.resize_images. tf.image.resize_nearest_neighbor only supports 4-D tensors but masks is a 3-D tensor. -- 213758622 by lzc: Internal change. -- PiperOrigin-RevId: 214018767

Minor fixes for object detection.
214018767 by Zhichao Lu: Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image -- 213914693 by lzc: Internal change. -- 213872175 by Zhichao Lu: This CL adds a Keras-based mobilenet_v2 feature extractor for object detection models. As part of this CL, we use the Keras mobilenet_v2 application's keyword argument layer injection API to allow the generated network to support the object detection hyperparameters. -- 213848499 by Zhichao Lu: Replace tf.image.resize_nearest_neighbor with tf.image.resize_images. tf.image.resize_nearest_neighbor only supports 4-D tensors but masks is a 3-D tensor. -- 213758622 by lzc: Internal change. -- PiperOrigin-RevId: 214018767
1f484095 · pkulzc · GitHub · 99256cf4 · 1f484095 · 1f484095
Unverified Commit 1f484095 authored Sep 21, 2018 by pkulzc Committed by GitHub Sep 21, 2018
20 changed files
--- a/research/object_detection/builders/hyperparams_builder.py
+++ b/research/object_detection/builders/hyperparams_builder.py
@@ -163,6 +163,10 @@ class KerasLayerHyperparams(object):
    new_params['activation'] = None
    if include_activation:
      new_params['activation'] = self._activation_fn
+    if self.use_batch_norm() and self.batch_norm_params()['center']:
+      new_params['use_bias'] = False
+    else:
+      new_params['use_bias'] = True
    new_params.update(**overrides)
    return new_params


--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -50,11 +50,6 @@ from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMo
 from object_detection.predictors import rfcn_box_predictor
 from object_detection.protos import model_pb2
 from object_detection.utils import ops
-# BEGIN GOOGLE-INTERNAL
-# TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
-# performance relative to a comparable Mask R-CNN model (b/112561592).
-from google3.image.understanding.object_detection.meta_architectures import ssd_mask_meta_arch
-# END GOOGLE-INTERNAL

 # A map of names to SSD feature extractors.
 SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
@@ -254,23 +249,6 @@ def _build_ssd_model(ssd_config, is_training, add_summaries,
        desired_negative_sampling_ratio)

  ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch
-  # BEGIN GOOGLE-INTERNAL
-  # TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
-  # performance relative to a comparable Mask R-CNN model (b/112561592).
-  predictor_config = ssd_config.box_predictor
-  predict_instance_masks = False
-  if predictor_config.WhichOneof(
-      'box_predictor_oneof') == 'convolutional_box_predictor':
-    predict_instance_masks = (
-        predictor_config.convolutional_box_predictor.HasField('mask_head'))
-  elif predictor_config.WhichOneof(
-      'box_predictor_oneof') == 'weight_shared_convolutional_box_predictor':
-    predict_instance_masks = (
-        predictor_config.weight_shared_convolutional_box_predictor.HasField(
-            'mask_head'))
-  if predict_instance_masks:
-    ssd_meta_arch_fn = ssd_mask_meta_arch.SSDMaskMetaArch
-  # END GOOGLE-INTERNAL

  return ssd_meta_arch_fn(
      is_training=is_training,

--- a/research/object_detection/builders/model_builder_test.py
+++ b/research/object_detection/builders/model_builder_test.py
@@ -40,11 +40,6 @@ from object_detection.models.ssd_mobilenet_v1_ppn_feature_extractor import SSDMo
 from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
 from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMobileNetV2FpnFeatureExtractor
 from object_detection.protos import model_pb2
-# BEGIN GOOGLE-INTERNAL
-# TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
-# performance relative to a comparable Mask R-CNN model (b/112561592).
-from google3.image.understanding.object_detection.meta_architectures import ssd_mask_meta_arch
-# END GOOGLE-INTERNAL

 FRCNN_RESNET_FEAT_MAPS = {
    'faster_rcnn_resnet50':
@@ -169,161 +164,6 @@ class ModelBuilderTest(tf.test.TestCase, parameterized.TestCase):
            'desired_negative_sampling_ratio': 2
        })

-  # BEGIN GOOGLE-INTERNAL
-  # TODO(lzc): move ssd_mask_meta_arch to third party when it has decent
-  # performance relative to a comparable Mask R-CNN model (b/112561592).
-  def test_create_ssd_conv_predictor_model_with_mask(self):
-    model_text_proto = """
-      ssd {
-        feature_extractor {
-          type: 'ssd_inception_v2'
-          conv_hyperparams {
-            regularizer {
-                l2_regularizer {
-                }
-              }
-              initializer {
-                truncated_normal_initializer {
-                }
-              }
-          }
-          override_base_feature_extractor_hyperparams: true
-        }
-        box_coder {
-          faster_rcnn_box_coder {
-          }
-        }
-        matcher {
-          argmax_matcher {
-          }
-        }
-        similarity_calculator {
-          iou_similarity {
-          }
-        }
-        anchor_generator {
-          ssd_anchor_generator {
-            aspect_ratios: 1.0
-          }
-        }
-        image_resizer {
-          fixed_shape_resizer {
-            height: 320
-            width: 320
-          }
-        }
-        box_predictor {
-          convolutional_box_predictor {
-            mask_head {
-            }
-            conv_hyperparams {
-              regularizer {
-                l2_regularizer {
-                }
-              }
-              initializer {
-                truncated_normal_initializer {
-                }
-              }
-            }
-          }
-        }
-        loss {
-          classification_loss {
-            weighted_softmax {
-            }
-          }
-          localization_loss {
-            weighted_smooth_l1 {
-            }
-          }
-        }
-        use_expected_classification_loss_under_sampling: true
-        minimum_negative_sampling: 10
-        desired_negative_sampling_ratio: 2
-      }"""
-    model_proto = model_pb2.DetectionModel()
-    text_format.Merge(model_text_proto, model_proto)
-    model = self.create_model(model_proto)
-    self.assertIsInstance(model, ssd_mask_meta_arch.SSDMaskMetaArch)
-
-  def test_create_ssd_weight_shared_predictor_model_with_mask(self):
-    model_text_proto = """
-      ssd {
-        feature_extractor {
-          type: 'ssd_inception_v2'
-          conv_hyperparams {
-            regularizer {
-                l2_regularizer {
-                }
-              }
-              initializer {
-                truncated_normal_initializer {
-                }
-              }
-          }
-          override_base_feature_extractor_hyperparams: true
-        }
-        box_coder {
-          faster_rcnn_box_coder {
-          }
-        }
-        matcher {
-          argmax_matcher {
-          }
-        }
-        similarity_calculator {
-          iou_similarity {
-          }
-        }
-        anchor_generator {
-          ssd_anchor_generator {
-            aspect_ratios: 1.0
-          }
-        }
-        image_resizer {
-          fixed_shape_resizer {
-            height: 320
-            width: 320
-          }
-        }
-        box_predictor {
-          weight_shared_convolutional_box_predictor {
-            mask_head {
-            }
-            depth: 32
-            conv_hyperparams {
-              regularizer {
-                l2_regularizer {
-                }
-              }
-              initializer {
-                random_normal_initializer {
-                }
-              }
-            }
-            num_layers_before_predictor: 1
-          }
-        }
-        loss {
-          classification_loss {
-            weighted_softmax {
-            }
-          }
-          localization_loss {
-            weighted_smooth_l1 {
-            }
-          }
-        }
-        use_expected_classification_loss_under_sampling: true
-        minimum_negative_sampling: 10
-        desired_negative_sampling_ratio: 2
-      }"""
-    model_proto = model_pb2.DetectionModel()
-    text_format.Merge(model_text_proto, model_proto)
-    model = self.create_model(model_proto)
-    self.assertIsInstance(model, ssd_mask_meta_arch.SSDMaskMetaArch)
-  # END GOOGLE-INTERNAL

  def test_create_ssd_inception_v3_model_from_config(self):
    model_text_proto = """

--- a/research/object_detection/core/post_processing_test.py
+++ b/research/object_detection/core/post_processing_test.py
@@ -58,55 +58,6 @@ class MulticlassNonMaxSuppressionTest(test_case.TestCase):
      self.assertAllClose(nms_classes_output, exp_nms_classes)

  # TODO(bhattad): Remove conditional after CMLE moves to TF 1.9
-  # BEGIN GOOGLE-INTERNAL
-  def test_multiclass_nms_select_with_shared_boxes_pad_to_max_output_size(self):
-    boxes = np.array([[[0, 0, 1, 1]],
-                      [[0, 0.1, 1, 1.1]],
-                      [[0, -0.1, 1, 0.9]],
-                      [[0, 10, 1, 11]],
-                      [[0, 10.1, 1, 11.1]],
-                      [[0, 100, 1, 101]],
-                      [[0, 1000, 1, 1002]],
-                      [[0, 1000, 1, 1002.1]]], np.float32)
-    scores = np.array([[.9, 0.01], [.75, 0.05],
-                       [.6, 0.01], [.95, 0],
-                       [.5, 0.01], [.3, 0.01],
-                       [.01, .85], [.01, .5]], np.float32)
-    score_thresh = 0.1
-    iou_thresh = .5
-    max_size_per_class = 4
-    max_output_size = 5
-
-    exp_nms_corners = [[0, 10, 1, 11],
-                       [0, 0, 1, 1],
-                       [0, 1000, 1, 1002],
-                       [0, 100, 1, 101]]
-    exp_nms_scores = [.95, .9, .85, .3]
-    exp_nms_classes = [0, 0, 1, 0]
-
-    def graph_fn(boxes, scores):
-      nms, num_valid_nms_boxes = post_processing.multiclass_non_max_suppression(
-          boxes,
-          scores,
-          score_thresh,
-          iou_thresh,
-          max_size_per_class,
-          max_total_size=max_output_size,
-          pad_to_max_output_size=True)
-      return [nms.get(), nms.get_field(fields.BoxListFields.scores),
-              nms.get_field(fields.BoxListFields.classes), num_valid_nms_boxes]
-
-    [nms_corners_output, nms_scores_output, nms_classes_output,
-     num_valid_nms_boxes] = self.execute(graph_fn, [boxes, scores])
-
-    self.assertEqual(num_valid_nms_boxes, 4)
-    self.assertAllClose(nms_corners_output[0:num_valid_nms_boxes],
-                        exp_nms_corners)
-    self.assertAllClose(nms_scores_output[0:num_valid_nms_boxes],
-                        exp_nms_scores)
-    self.assertAllClose(nms_classes_output[0:num_valid_nms_boxes],
-                        exp_nms_classes)
-  # END GOOGLE-INTERNAL

  def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self):
    boxes = tf.constant([[[0, 0, 1, 1]],
@@ -1126,61 +1077,6 @@ class MulticlassNonMaxSuppressionTest(test_case.TestCase):
      self.assertAllClose(num_detections, [1, 1])

  # TODO(bhattad): Remove conditional after CMLE moves to TF 1.9
-  # BEGIN GOOGLE-INTERNAL
-  def test_batch_multiclass_nms_with_use_static_shapes(self):
-    boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
-                       [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
-                       [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
-                       [[0, 10, 1, 11], [0, 10, 1, 11]]],
-                      [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
-                       [[0, 100, 1, 101], [0, 100, 1, 101]],
-                       [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
-                       [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
-                     np.float32)
-    scores = np.array([[[.9, 0.01], [.75, 0.05],
-                        [.6, 0.01], [.95, 0]],
-                       [[.5, 0.01], [.3, 0.01],
-                        [.01, .85], [.01, .5]]],
-                      np.float32)
-    clip_window = np.array([[0., 0., 5., 5.],
-                            [0., 0., 200., 200.]],
-                           np.float32)
-    score_thresh = 0.1
-    iou_thresh = .5
-    max_output_size = 4
-
-    exp_nms_corners = np.array([[[0, 0, 1, 1],
-                                 [0, 0, 0, 0],
-                                 [0, 0, 0, 0],
-                                 [0, 0, 0, 0]],
-                                [[0, 10.1, 1, 11.1],
-                                 [0, 100, 1, 101],
-                                 [0, 0, 0, 0],
-                                 [0, 0, 0, 0]]])
-    exp_nms_scores = np.array([[.9, 0., 0., 0.],
-                               [.5, .3, 0, 0]])
-    exp_nms_classes = np.array([[0, 0, 0, 0],
-                                [0, 0, 0, 0]])
-
-    def graph_fn(boxes, scores, clip_window):
-      (nmsed_boxes, nmsed_scores, nmsed_classes, _, _, num_detections
-      ) = post_processing.batch_multiclass_non_max_suppression(
-          boxes, scores, score_thresh, iou_thresh,
-          max_size_per_class=max_output_size, clip_window=clip_window,
-          use_static_shapes=True)
-      return nmsed_boxes, nmsed_scores, nmsed_classes, num_detections
-
-    (nmsed_boxes, nmsed_scores, nmsed_classes,
-     num_detections) = self.execute(graph_fn, [boxes, scores, clip_window])
-    for i in range(len(num_detections)):
-      self.assertAllClose(nmsed_boxes[i, 0:num_detections[i]],
-                          exp_nms_corners[i, 0:num_detections[i]])
-      self.assertAllClose(nmsed_scores[i, 0:num_detections[i]],
-                          exp_nms_scores[i, 0:num_detections[i]])
-      self.assertAllClose(nmsed_classes[i, 0:num_detections[i]],
-                          exp_nms_classes[i, 0:num_detections[i]])
-    self.assertAllClose(num_detections, [1, 2])
-  # END GOOGLE-INTERNAL

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -811,8 +811,10 @@ def random_image_scale(image,
        image, [image_newysize, image_newxsize], align_corners=True)
    result.append(image)
    if masks is not None:
-      masks = tf.image.resize_nearest_neighbor(
-          masks, [image_newysize, image_newxsize], align_corners=True)
+      masks = tf.image.resize_images(
+          masks, [image_newysize, image_newxsize],
+          method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
+          align_corners=True)
      result.append(masks)
    return tuple(result)


--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -36,6 +36,7 @@ class InputDataFields(object):
    image: image.
    image_additional_channels: additional channels.
    original_image: image in the original input size.
+    original_image_spatial_shape: image in the original input size.
    key: unique key corresponding to image.
    source_id: source of the original image.
    filename: original filename of the dataset (without common path).
@@ -70,6 +71,7 @@ class InputDataFields(object):
  image = 'image'
  image_additional_channels = 'image_additional_channels'
  original_image = 'original_image'
+  original_image_spatial_shape = 'original_image_spatial_shape'
  key = 'key'
  source_id = 'source_id'
  filename = 'filename'

--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -322,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
      A dictionary of the following tensors.
      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
        containing image.
+      fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
+        shape [2] containing shape of the image.
      fields.InputDataFields.source_id - string tensor containing original
        image id.
      fields.InputDataFields.key - string tensor with unique sha256 hash key.
@@ -365,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
    is_crowd = fields.InputDataFields.groundtruth_is_crowd
    tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
    tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
+    tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
+        tensor_dict[fields.InputDataFields.image])[:2]
    tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
        tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]


--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
@@ -97,13 +97,17 @@ class TfExampleDecoderTest(tf.test.TestCase):
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

-    self.assertAllEqual(
-        (tensor_dict[fields.InputDataFields.image].get_shape().as_list()),
-        [None, None, 3])
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
+                         get_shape().as_list()), [None, None, 3])
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.
+                                     original_image_spatial_shape].
+                         get_shape().as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
+    self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
+                                            original_image_spatial_shape])
    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])

  def testDecodeImageKeyAndFilename(self):
@@ -141,13 +145,17 @@ class TfExampleDecoderTest(tf.test.TestCase):
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

-    self.assertAllEqual(
-        (tensor_dict[fields.InputDataFields.image].get_shape().as_list()),
-        [None, None, 3])
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
+                         get_shape().as_list()), [None, None, 3])
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.
+                                     original_image_spatial_shape].
+                         get_shape().as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
+    self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
+                                            original_image_spatial_shape])
    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])

  def testDecodePngInstanceMasks(self):

--- a/research/object_detection/inputs.py
+++ b/research/object_detection/inputs.py
@@ -103,7 +103,8 @@ def transform_input_data(tensor_dict,

  if retain_original_image:
    tensor_dict[fields.InputDataFields.original_image] = tf.cast(
-        tensor_dict[fields.InputDataFields.image], tf.uint8)
+        image_resizer_fn(tensor_dict[fields.InputDataFields.image], None)[0],
+        tf.uint8)

  # Apply data augmentation ops.
  if data_augmentation_fn is not None:
@@ -199,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
      fields.InputDataFields.image: [
          height, width, 3 + num_additional_channels
      ],
+      fields.InputDataFields.original_image_spatial_shape: [2],
      fields.InputDataFields.image_additional_channels: [
          height, width, num_additional_channels
      ],
@@ -230,7 +232,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,

  if fields.InputDataFields.original_image in tensor_dict:
    padding_shapes[fields.InputDataFields.original_image] = [
-        None, None, 3 + num_additional_channels
+        height, width, 3 + num_additional_channels
    ]
  if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
    tensor_shape = (
@@ -364,7 +366,9 @@ def _get_features_dict(input_dict):
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
-          input_dict[fields.InputDataFields.true_image_shape]
+          input_dict[fields.InputDataFields.true_image_shape],
+      fields.InputDataFields.original_image_spatial_shape:
+          input_dict[fields.InputDataFields.original_image_spatial_shape]
  }
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
@@ -479,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config,
 def create_eval_input_fn(eval_config, eval_input_config, model_config):
  """Creates an eval `input` function for `Estimator`.

-  # TODO(ronnyvotel,rathodv): Allow batch sizes of more than 1 for eval.
-
  Args:
    eval_config: An eval_pb2.EvalConfig.
    eval_input_config: An input_reader_pb2.InputReader.
@@ -562,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
      return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
    dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
        eval_input_config,
-        batch_size=1,  # Currently only support batch size of 1 for eval.
+        batch_size=params['batch_size'] if params else eval_config.batch_size,
        transform_input_data_fn=transform_and_pad_input_data_fn)
    return dataset


--- a/research/object_detection/inputs_test.py
+++ b/research/object_detection/inputs_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function

 import functools
 import os
+from absl.testing import parameterized

 import numpy as np
 import tensorflow as tf
@@ -28,6 +29,7 @@ from object_detection import inputs
 from object_detection.core import preprocessor
 from object_detection.core import standard_fields as fields
 from object_detection.utils import config_util
+from object_detection.utils import test_case

 FLAGS = tf.flags.FLAGS

@@ -64,7 +66,7 @@ def _make_initializable_iterator(dataset):
  return iterator


-class InputsTest(tf.test.TestCase):
+class InputsTest(test_case.TestCase, parameterized.TestCase):

  def test_faster_rcnn_resnet50_train_input(self):
    """Tests the training input function for FasterRcnnResnet50."""
@@ -103,52 +105,59 @@ class InputsTest(tf.test.TestCase):
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_weights].dtype)

-  def test_faster_rcnn_resnet50_eval_input(self):
+  @parameterized.parameters(
+      {'eval_batch_size': 1},
+      {'eval_batch_size': 8}
+  )
+  def test_faster_rcnn_resnet50_eval_input(self, eval_batch_size=1):
    """Tests the eval input function for FasterRcnnResnet50."""
    configs = _get_configs_for_model('faster_rcnn_resnet50_pets')
    model_config = configs['model']
    model_config.faster_rcnn.num_classes = 37
+    eval_config = configs['eval_config']
+    eval_config.batch_size = eval_batch_size
    eval_input_fn = inputs.create_eval_input_fn(
-        configs['eval_config'], configs['eval_input_configs'][0], model_config)
+        eval_config, configs['eval_input_configs'][0], model_config)
    features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
-    self.assertAllEqual([1, None, None, 3],
+    self.assertAllEqual([eval_batch_size, None, None, 3],
                        features[fields.InputDataFields.image].shape.as_list())
    self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
    self.assertAllEqual(
-        [1, None, None, 3],
+        [eval_batch_size, None, None, 3],
        features[fields.InputDataFields.original_image].shape.as_list())
    self.assertEqual(tf.uint8,
                     features[fields.InputDataFields.original_image].dtype)
-    self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list())
+    self.assertAllEqual([eval_batch_size],
+                        features[inputs.HASH_KEY].shape.as_list())
    self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
    self.assertAllEqual(
-        [1, 100, 4],
+        [eval_batch_size, 100, 4],
        labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_boxes].dtype)
    self.assertAllEqual(
-        [1, 100, model_config.faster_rcnn.num_classes],
+        [eval_batch_size, 100, model_config.faster_rcnn.num_classes],
        labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_classes].dtype)
    self.assertAllEqual(
-        [1, 100, model_config.faster_rcnn.num_classes],
+        [eval_batch_size, 100, model_config.faster_rcnn.num_classes],
        labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
    self.assertEqual(
        tf.float32,
        labels[fields.InputDataFields.groundtruth_confidences].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_area].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_area].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
    self.assertEqual(
        tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
    self.assertEqual(
        tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
@@ -197,53 +206,60 @@ class InputsTest(tf.test.TestCase):
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_weights].dtype)

-  def test_ssd_inceptionV2_eval_input(self):
+  @parameterized.parameters(
+      {'eval_batch_size': 1},
+      {'eval_batch_size': 8}
+  )
+  def test_ssd_inceptionV2_eval_input(self, eval_batch_size=1):
    """Tests the eval input function for SSDInceptionV2."""
    configs = _get_configs_for_model('ssd_inception_v2_pets')
    model_config = configs['model']
    model_config.ssd.num_classes = 37
+    eval_config = configs['eval_config']
+    eval_config.batch_size = eval_batch_size
    eval_input_fn = inputs.create_eval_input_fn(
-        configs['eval_config'], configs['eval_input_configs'][0], model_config)
+        eval_config, configs['eval_input_configs'][0], model_config)
    features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
-    self.assertAllEqual([1, 300, 300, 3],
+    self.assertAllEqual([eval_batch_size, 300, 300, 3],
                        features[fields.InputDataFields.image].shape.as_list())
    self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
    self.assertAllEqual(
-        [1, None, None, 3],
+        [eval_batch_size, 300, 300, 3],
        features[fields.InputDataFields.original_image].shape.as_list())
    self.assertEqual(tf.uint8,
                     features[fields.InputDataFields.original_image].dtype)
-    self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list())
+    self.assertAllEqual([eval_batch_size],
+                        features[inputs.HASH_KEY].shape.as_list())
    self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
    self.assertAllEqual(
-        [1, 100, 4],
+        [eval_batch_size, 100, 4],
        labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_boxes].dtype)
    self.assertAllEqual(
-        [1, 100, model_config.ssd.num_classes],
+        [eval_batch_size, 100, model_config.ssd.num_classes],
        labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_classes].dtype)
    self.assertAllEqual(
-        [1, 100, model_config.ssd.num_classes],
+        [eval_batch_size, 100, model_config.ssd.num_classes],
        labels[
            fields.InputDataFields.groundtruth_confidences].shape.as_list())
    self.assertEqual(
        tf.float32,
        labels[fields.InputDataFields.groundtruth_confidences].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_area].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_area].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
    self.assertEqual(
        tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
    self.assertEqual(
        tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
@@ -379,7 +395,7 @@ class InputsTest(tf.test.TestCase):
    self.assertEqual(out_string, '2798129067578209328')


-class DataAugmentationFnTest(tf.test.TestCase):
+class DataAugmentationFnTest(test_case.TestCase):

  def test_apply_image_and_box_augmentation(self):
    data_augmentation_options = [
@@ -529,7 +545,7 @@ def _fake_image_resizer_fn(image, mask):
  return (image, mask, tf.shape(image))


-class DataTransformationFnTest(tf.test.TestCase):
+class DataTransformationFnTest(test_case.TestCase):

  def test_combine_additional_channels_if_present(self):
    image = np.random.rand(4, 4, 3).astype(np.float32)
@@ -622,7 +638,9 @@ class DataTransformationFnTest(tf.test.TestCase):
        fields.InputDataFields.groundtruth_instance_masks:
            tf.constant(np.random.rand(2, 4, 4).astype(np.float32)),
        fields.InputDataFields.groundtruth_classes:
-            tf.constant(np.array([3, 1], np.int32))
+            tf.constant(np.array([3, 1], np.int32)),
+        fields.InputDataFields.original_image_spatial_shape:
+            tf.constant(np.array([4, 4], np.int32))
    }

    def fake_image_resizer_fn(image, masks=None):
@@ -649,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase):
    self.assertAllEqual(transformed_inputs[
        fields.InputDataFields.original_image].dtype, tf.uint8)
    self.assertAllEqual(transformed_inputs[
-        fields.InputDataFields.original_image].shape, [4, 4, 3])
+        fields.InputDataFields.original_image_spatial_shape], [4, 4])
+    self.assertAllEqual(transformed_inputs[
+        fields.InputDataFields.original_image].shape, [8, 8, 3])
    self.assertAllEqual(transformed_inputs[
        fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8])

@@ -741,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase):
                        (np_image + 5) * 2)


-class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
+class PadInputDataToStaticShapesFnTest(test_case.TestCase):

  def test_pad_images_boxes_and_classes(self):
    input_tensor_dict = {
@@ -751,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
            tf.placeholder(tf.float32, [None, 4]),
        fields.InputDataFields.groundtruth_classes:
            tf.placeholder(tf.int32, [None, 3]),
-        fields.InputDataFields.true_image_shape: tf.placeholder(tf.int32, [3]),
+        fields.InputDataFields.true_image_shape:
+            tf.placeholder(tf.int32, [3]),
+        fields.InputDataFields.original_image_spatial_shape:
+            tf.placeholder(tf.int32, [2])
    }
    padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
        tensor_dict=input_tensor_dict,
@@ -765,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
    self.assertAllEqual(
        padded_tensor_dict[fields.InputDataFields.true_image_shape]
        .shape.as_list(), [3])
+    self.assertAllEqual(
+        padded_tensor_dict[fields.InputDataFields.original_image_spatial_shape]
+        .shape.as_list(), [2])
    self.assertAllEqual(
        padded_tensor_dict[fields.InputDataFields.groundtruth_boxes]
        .shape.as_list(), [3, 4])

--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
@@ -504,13 +504,6 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
      for key in expected_shapes:
        self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])

-  # BEGIN GOOGLE-INTERNAL
-  # TODO(bhattad): Remove conditional after CMLE moves to TF 1.11
-  @parameterized.parameters(
-      {'use_static_shapes': False},
-      {'use_static_shapes': True}
-  )
-  # END GOOGLE-INTERNAL
  def test_predict_gives_correct_shapes_in_train_mode_both_stages(
      self,
      use_static_shapes=False):
@@ -1187,16 +1180,6 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
          'Loss/BoxClassifierLoss/classification_loss'], 0)
      self.assertAllClose(loss_dict_out['Loss/BoxClassifierLoss/mask_loss'], 0)

-  # BEGIN GOOGLE-INTERNAL
-  # TODO(bhattad): Remove conditional after CMLE moves to TF 1.11
-  @parameterized.parameters(
-      {'use_static_shapes': False, 'shared_boxes': False},
-      {'use_static_shapes': False, 'shared_boxes': True},
-
-      {'use_static_shapes': True, 'shared_boxes': False},
-      {'use_static_shapes': True, 'shared_boxes': True},
-  )
-  # END GOOGLE-INTERNAL
  def test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images(
      self, use_static_shapes=False, shared_boxes=False):
    batch_size = 2

--- a/research/object_detection/meta_architectures/ssd_meta_arch.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch.py
@@ -125,12 +125,13 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
               depth_multiplier,
               min_depth,
               pad_to_multiple,
-               conv_hyperparams_config,
+               conv_hyperparams,
               freeze_batchnorm,
               inplace_batchnorm_update,
               use_explicit_padding=False,
               use_depthwise=False,
-               override_base_feature_extractor_hyperparams=False):
+               override_base_feature_extractor_hyperparams=False,
+               name=None):
    """Constructor.

    Args:
@@ -139,9 +140,9 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
-      conv_hyperparams_config: A hyperparams.proto object containing
-        convolution hyperparameters for the layers added on top of the
-        base feature extractor.
+      conv_hyperparams: `hyperparams_builder.KerasLayerHyperparams` object
+        containing convolution hyperparameters for the layers added on top of
+        the base feature extractor.
      freeze_batchnorm: Whether to freeze batch norm parameters during
        training or not. When training with a small batch size (e.g. 1), it is
        desirable to freeze batch norm update and use pretrained batch norm
@@ -156,14 +157,16 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
      override_base_feature_extractor_hyperparams: Whether to override
        hyperparameters of the base feature extractor with the one from
        `conv_hyperparams_config`.
+      name: A string name scope to assign to the model. If 'None', Keras
+        will auto-generate one from the class name.
    """
-    super(SSDKerasFeatureExtractor, self).__init__()
+    super(SSDKerasFeatureExtractor, self).__init__(name=name)

    self._is_training = is_training
    self._depth_multiplier = depth_multiplier
    self._min_depth = min_depth
    self._pad_to_multiple = pad_to_multiple
-    self._conv_hyperparams_config = conv_hyperparams_config
+    self._conv_hyperparams = conv_hyperparams
    self._freeze_batchnorm = freeze_batchnorm
    self._inplace_batchnorm_update = inplace_batchnorm_update
    self._use_explicit_padding = use_explicit_padding

--- a/research/object_detection/meta_architectures/ssd_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch_test.py
@@ -210,60 +210,6 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
      self.assertAllClose(detections_out['num_detections'],
                          expected_num_detections)

-  # BEGIN GOOGLE-INTERNAL
-  # TODO(b/112621326): Remove conditional after CMLE moves to TF 1.11
-  def test_postprocess_results_are_correct_static(self, use_keras):
-    with tf.Graph().as_default():
-      _, _, _, _ = self._create_model(use_keras=use_keras)
-    def graph_fn(input_image):
-      model, _, _, _ = self._create_model(use_static_shapes=True,
-                                          nms_max_size_per_class=4)
-      preprocessed_inputs, true_image_shapes = model.preprocess(input_image)
-      prediction_dict = model.predict(preprocessed_inputs,
-                                      true_image_shapes)
-      detections = model.postprocess(prediction_dict, true_image_shapes)
-      return (detections['detection_boxes'], detections['detection_scores'],
-              detections['detection_classes'], detections['num_detections'])
-
-    batch_size = 2
-    image_size = 2
-    channels = 3
-    input_image = np.random.rand(batch_size, image_size, image_size,
-                                 channels).astype(np.float32)
-    expected_boxes = [
-        [
-            [0, 0, .5, .5],
-            [0, .5, .5, 1],
-            [.5, 0, 1, .5],
-            [0, 0, 0, 0]
-        ],  # padding
-        [
-            [0, 0, .5, .5],
-            [0, .5, .5, 1],
-            [.5, 0, 1, .5],
-            [0, 0, 0, 0]
-        ]
-    ]  # padding
-    expected_scores = [[0, 0, 0, 0], [0, 0, 0, 0]]
-    expected_classes = [[0, 0, 0, 0], [0, 0, 0, 0]]
-    expected_num_detections = np.array([3, 3])
-
-    (detection_boxes, detection_scores, detection_classes,
-     num_detections) = self.execute(graph_fn, [input_image])
-    for image_idx in range(batch_size):
-      self.assertTrue(test_utils.first_rows_close_as_set(
-          detection_boxes[image_idx][
-              0:expected_num_detections[image_idx]].tolist(),
-          expected_boxes[image_idx][0:expected_num_detections[image_idx]]))
-      self.assertAllClose(
-          detection_scores[image_idx][0:expected_num_detections[image_idx]],
-          expected_scores[image_idx][0:expected_num_detections[image_idx]])
-      self.assertAllClose(
-          detection_classes[image_idx][0:expected_num_detections[image_idx]],
-          expected_classes[image_idx][0:expected_num_detections[image_idx]])
-    self.assertAllClose(num_detections,
-                        expected_num_detections)
-  # END GOOGLE-INTERNAL

  def test_loss_results_are_correct(self, use_keras):


--- a/research/object_detection/meta_architectures/ssd_meta_arch_test_lib.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch_test_lib.py
@@ -67,7 +67,7 @@ class FakeSSDKerasFeatureExtractor(ssd_meta_arch.SSDKerasFeatureExtractor):
          depth_multiplier=0,
          min_depth=0,
          pad_to_multiple=1,
-          conv_hyperparams_config=None,
+          conv_hyperparams=None,
          freeze_batchnorm=False,
          inplace_batchnorm_update=False,
      )

--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -377,9 +377,14 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
      groundtruth = _prepare_groundtruth_for_eval(detection_model,
                                                  class_agnostic)
      use_original_images = fields.InputDataFields.original_image in features
-      eval_images = (
-          features[fields.InputDataFields.original_image]
-          if use_original_images else features[fields.InputDataFields.image])
+      if use_original_images:
+        eval_images = tf.cast(tf.image.resize_bilinear(
+            features[fields.InputDataFields.original_image][0:1],
+            features[fields.InputDataFields.original_image_spatial_shape][0]),
+                              tf.uint8)
+      else:
+        eval_images = features[fields.InputDataFields.image]
+
      eval_dict = eval_util.result_dict_for_single_example(
          eval_images[0:1],
          features[inputs.HASH_KEY][0],
@@ -520,8 +525,7 @@ def create_estimator_and_inputs(run_config,
  configs = get_configs_from_pipeline_file(pipeline_config_path)
  kwargs.update({
      'train_steps': train_steps,
-      'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples,
-      'retain_original_images_in_eval': False if use_tpu else True,
+      'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples
  })
  if override_eval_num_epochs:
    kwargs.update({'eval_num_epochs': 1})
@@ -586,10 +590,6 @@ def create_estimator_and_inputs(run_config,
        use_tpu=use_tpu,
        config=run_config,
        # TODO(lzc): Remove conditional after CMLE moves to TF 1.9
-        # BEGIN GOOGLE-INTERNAL
-        export_to_tpu=export_to_tpu,
-        eval_on_tpu=False,  # Eval runs on CPU, so disable eval on TPU
-        # END GOOGLE-INTERNAL
        params=params if params else {})
  else:
    estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)

--- a/research/object_detection/models/feature_map_generators.py
+++ b/research/object_detection/models/feature_map_generators.py
@@ -145,7 +145,7 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
    if 'use_depthwise' in feature_map_layout:
      use_depthwise = feature_map_layout['use_depthwise']
    for index, from_layer in enumerate(feature_map_layout['from_layer']):
-      net = tf.keras.Sequential(name='output_%d' % index)
+      net = []
      self.convolutions.append(net)
      layer_depth = feature_map_layout['layer_depth'][index]
      conv_kernel_size = 3
@@ -157,17 +157,17 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
        if insert_1x1_conv:
          layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
              base_from_layer, index, depth_fn(layer_depth / 2))
-          net.add(tf.keras.layers.Conv2D(depth_fn(layer_depth / 2),
-                                         [1, 1],
-                                         padding='SAME',
-                                         strides=1,
-                                         name=layer_name + '_conv',
-                                         **conv_hyperparams.params()))
-          net.add(
+          net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth / 2),
+                                            [1, 1],
+                                            padding='SAME',
+                                            strides=1,
+                                            name=layer_name + '_conv',
+                                            **conv_hyperparams.params()))
+          net.append(
              conv_hyperparams.build_batch_norm(
                  training=(is_training and not freeze_batchnorm),
                  name=layer_name + '_batchnorm'))
-          net.add(
+          net.append(
              conv_hyperparams.build_activation_layer(
                  name=layer_name))

@@ -182,51 +182,52 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
          # conv_kernel_size, to avoid holding a reference to the loop variable
          # conv_kernel_size inside of a lambda function
          def fixed_padding(features, kernel_size=conv_kernel_size):
-            ops.fixed_padding(features, kernel_size)
-          net.add(tf.keras.layers.Lambda(fixed_padding))
+            return ops.fixed_padding(features, kernel_size)
+          net.append(tf.keras.layers.Lambda(fixed_padding))
        # TODO(rathodv): Add some utilities to simplify the creation of
        # Depthwise & non-depthwise convolutions w/ normalization & activations
        if use_depthwise:
-          net.add(tf.keras.layers.DepthwiseConv2D(
+          net.append(tf.keras.layers.DepthwiseConv2D(
              [conv_kernel_size, conv_kernel_size],
              depth_multiplier=1,
              padding=padding,
              strides=stride,
              name=layer_name + '_depthwise_conv',
              **conv_hyperparams.params()))
-          net.add(
+          net.append(
              conv_hyperparams.build_batch_norm(
                  training=(is_training and not freeze_batchnorm),
                  name=layer_name + '_depthwise_batchnorm'))
-          net.add(
+          net.append(
              conv_hyperparams.build_activation_layer(
                  name=layer_name + '_depthwise'))

-          net.add(tf.keras.layers.Conv2D(depth_fn(layer_depth), [1, 1],
-                                         padding='SAME',
-                                         strides=1,
-                                         name=layer_name + '_conv',
-                                         **conv_hyperparams.params()))
-          net.add(
+          net.append(tf.keras.layers.Conv2D(depth_fn(layer_depth), [1, 1],
+                                            padding='SAME',
+                                            strides=1,
+                                            name=layer_name + '_conv',
+                                            **conv_hyperparams.params()))
+          net.append(
              conv_hyperparams.build_batch_norm(
                  training=(is_training and not freeze_batchnorm),
                  name=layer_name + '_batchnorm'))
-          net.add(
+          net.append(
              conv_hyperparams.build_activation_layer(
                  name=layer_name))

        else:
-          net.add(tf.keras.layers.Conv2D(depth_fn(layer_depth),
-                                         [conv_kernel_size, conv_kernel_size],
-                                         padding=padding,
-                                         strides=stride,
-                                         name=layer_name + '_conv',
-                                         **conv_hyperparams.params()))
-          net.add(
+          net.append(tf.keras.layers.Conv2D(
+              depth_fn(layer_depth),
+              [conv_kernel_size, conv_kernel_size],
+              padding=padding,
+              strides=stride,
+              name=layer_name + '_conv',
+              **conv_hyperparams.params()))
+          net.append(
              conv_hyperparams.build_batch_norm(
                  training=(is_training and not freeze_batchnorm),
                  name=layer_name + '_batchnorm'))
-          net.add(
+          net.append(
              conv_hyperparams.build_activation_layer(
                  name=layer_name))

@@ -252,8 +253,9 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
        feature_map_keys.append(from_layer)
      else:
        feature_map = feature_maps[-1]
-        feature_map = self.convolutions[index](feature_map)
-        layer_name = self.convolutions[index].layers[-1].name
+        for layer in self.convolutions[index]:
+          feature_map = layer(feature_map)
+        layer_name = self.convolutions[index][-1].name
        feature_map_keys.append(layer_name)
      feature_maps.append(feature_map)
    return collections.OrderedDict(

--- a/research/object_detection/models/feature_map_generators_test.py
+++ b/research/object_detection/models/feature_map_generators_test.py
@@ -118,8 +118,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
      self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)

  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
-  # BEGIN GOOGLE-INTERNAL
-  def test_get_expected_feature_map_shapes_with_inception_v2_use_depthwise(
+
+  def test_get_expected_feature_map_shapes_use_explicit_padding(
      self, use_keras):
    image_features = {
        'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
@@ -127,7 +127,7 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
        'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
    }
    layout_copy = INCEPTION_V2_LAYOUT.copy()
-    layout_copy['use_depthwise'] = True
+    layout_copy['use_explicit_padding'] = True
    feature_map_generator = self._build_feature_map_generator(
        feature_map_layout=layout_copy,
        use_keras=use_keras
@@ -149,7 +149,6 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
      out_feature_map_shapes = dict(
          (key, value.shape) for key, value in out_feature_maps.items())
      self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
-  # END GOOGLE-INTERNAL

  def test_get_expected_feature_map_shapes_with_inception_v3(self, use_keras):
    image_features = {
@@ -238,18 +237,18 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
    ])

    expected_keras_variables = set([
-        'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel',
-        'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias',
-        'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel',
-        'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias',
-        'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel',
-        'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias',
-        'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel',
-        'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias',
-        'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel',
-        'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias',
-        'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel',
-        'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
+        'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel',
+        'FeatureMaps/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias',
+        'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel',
+        'FeatureMaps/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias',
+        'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel',
+        'FeatureMaps/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias',
+        'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel',
+        'FeatureMaps/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias',
+        'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel',
+        'FeatureMaps/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias',
+        'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel',
+        'FeatureMaps/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
    ])

    init_op = tf.global_variables_initializer()
@@ -264,82 +263,6 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
        self.assertSetEqual(expected_slim_variables, actual_variable_set)

  # TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
-  # BEGIN GOOGLE-INTERNAL
-  def test_get_expected_variable_names_with_inception_v2_use_depthwise(
-      self,
-      use_keras):
-    image_features = {
-        'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32),
-        'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32),
-        'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32)
-    }
-    layout_copy = INCEPTION_V2_LAYOUT.copy()
-    layout_copy['use_depthwise'] = True
-    feature_map_generator = self._build_feature_map_generator(
-        feature_map_layout=layout_copy,
-        use_keras=use_keras
-    )
-    feature_maps = feature_map_generator(image_features)
-
-    expected_slim_variables = set([
-        'Mixed_5c_1_Conv2d_3_1x1_256/weights',
-        'Mixed_5c_1_Conv2d_3_1x1_256/biases',
-        'Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise/depthwise_weights',
-        'Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise/biases',
-        'Mixed_5c_2_Conv2d_3_3x3_s2_512/weights',
-        'Mixed_5c_2_Conv2d_3_3x3_s2_512/biases',
-        'Mixed_5c_1_Conv2d_4_1x1_128/weights',
-        'Mixed_5c_1_Conv2d_4_1x1_128/biases',
-        'Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise/depthwise_weights',
-        'Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise/biases',
-        'Mixed_5c_2_Conv2d_4_3x3_s2_256/weights',
-        'Mixed_5c_2_Conv2d_4_3x3_s2_256/biases',
-        'Mixed_5c_1_Conv2d_5_1x1_128/weights',
-        'Mixed_5c_1_Conv2d_5_1x1_128/biases',
-        'Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise/depthwise_weights',
-        'Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise/biases',
-        'Mixed_5c_2_Conv2d_5_3x3_s2_256/weights',
-        'Mixed_5c_2_Conv2d_5_3x3_s2_256/biases',
-    ])
-
-    expected_keras_variables = set([
-        'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/kernel',
-        'FeatureMaps/output_3/Mixed_5c_1_Conv2d_3_1x1_256_conv/bias',
-        ('FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise_conv/'
-         'depthwise_kernel'),
-        ('FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_depthwise_conv/'
-         'bias'),
-        'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/kernel',
-        'FeatureMaps/output_3/Mixed_5c_2_Conv2d_3_3x3_s2_512_conv/bias',
-        'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/kernel',
-        'FeatureMaps/output_4/Mixed_5c_1_Conv2d_4_1x1_128_conv/bias',
-        ('FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise_conv/'
-         'depthwise_kernel'),
-        ('FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_depthwise_conv/'
-         'bias'),
-        'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/kernel',
-        'FeatureMaps/output_4/Mixed_5c_2_Conv2d_4_3x3_s2_256_conv/bias',
-        'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/kernel',
-        'FeatureMaps/output_5/Mixed_5c_1_Conv2d_5_1x1_128_conv/bias',
-        ('FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise_conv/'
-         'depthwise_kernel'),
-        ('FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_depthwise_conv/'
-         'bias'),
-        'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/kernel',
-        'FeatureMaps/output_5/Mixed_5c_2_Conv2d_5_3x3_s2_256_conv/bias',
-    ])
-
-    init_op = tf.global_variables_initializer()
-    with self.test_session() as sess:
-      sess.run(init_op)
-      sess.run(feature_maps)
-      actual_variable_set = set(
-          [var.op.name for var in tf.trainable_variables()])
-      if use_keras:
-        self.assertSetEqual(expected_keras_variables, actual_variable_set)
-      else:
-        self.assertSetEqual(expected_slim_variables, actual_variable_set)
-  # END GOOGLE-INTERNAL


 class FPNFeatureMapGeneratorTest(tf.test.TestCase):

--- a/research/object_detection/models/keras_applications/__init__.py
+++ b/research/object_detection/models/keras_applications/__init__.py
--- a/research/object_detection/models/keras_applications/mobilenet_v2.py
+++ b/research/object_detection/models/keras_applications/mobilenet_v2.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A wrapper around the MobileNet v2 models for Keras, for object detection."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from object_detection.core import freezable_batch_norm
+from object_detection.utils import ops
+
+
+# pylint: disable=invalid-name
+# This method copied from the slim mobilenet base network code (same license)
+def _make_divisible(v, divisor, min_value=None):
+  if min_value is None:
+    min_value = divisor
+  new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+  # Make sure that round down does not go down by more than 10%.
+  if new_v < 0.9 * v:
+    new_v += divisor
+  return new_v
+
+
+class _LayersOverride(object):
+  """Alternative Keras layers interface for the Keras MobileNetV2."""
+
+  def __init__(self,
+               batchnorm_training,
+               default_batchnorm_momentum=0.999,
+               conv_hyperparams=None,
+               use_explicit_padding=False,
+               alpha=1.0,
+               min_depth=None):
+    """Alternative tf.keras.layers interface, for use by the Keras MobileNetV2.
+
+    It is used by the Keras applications kwargs injection API to
+    modify the Mobilenet v2 Keras application with changes required by
+    the Object Detection API.
+
+    These injected interfaces make the following changes to the network:
+
+    - Applies the Object Detection hyperparameter configuration
+    - Supports FreezableBatchNorms
+    - Adds support for a min number of filters for each layer
+    - Makes the `alpha` parameter affect the final convolution block even if it
+        is less than 1.0
+    - Adds support for explicit padding of convolutions
+
+    Args:
+      batchnorm_training: Bool. Assigned to Batch norm layer `training` param
+        when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
+      default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
+        batch norm layers will be constructed using this value as the momentum.
+      conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
+        containing hyperparameters for convolution ops. Optionally set to `None`
+        to use default mobilenet_v2 layer builders.
+      use_explicit_padding: If True, use 'valid' padding for convolutions,
+        but explicitly pre-pads inputs so that the output dimensions are the
+        same as if 'same' padding were used. Off by default.
+      alpha: The width multiplier referenced in the MobileNetV2 paper. It
+        modifies the number of filters in each convolutional layer.
+      min_depth: Minimum number of filters in the convolutional layers.
+    """
+    self._alpha = alpha
+    self._batchnorm_training = batchnorm_training
+    self._default_batchnorm_momentum = default_batchnorm_momentum
+    self._conv_hyperparams = conv_hyperparams
+    self._use_explicit_padding = use_explicit_padding
+    self._min_depth = min_depth
+
+  def _FixedPaddingLayer(self, kernel_size):
+    return tf.keras.layers.Lambda(lambda x: ops.fixed_padding(x, kernel_size))
+
+  def Conv2D(self, filters, **kwargs):
+    """Builds a Conv2D layer according to the current Object Detection config.
+
+    Overrides the Keras MobileNetV2 application's convolutions with ones that
+    follow the spec specified by the Object Detection hyperparameters.
+
+    Args:
+      filters: The number of filters to use for the convolution.
+      **kwargs: Keyword args specified by the Keras application for
+        constructing the convolution.
+
+    Returns:
+      A one-arg callable that will either directly apply a Keras Conv2D layer to
+      the input argument, or that will first pad the input then apply a Conv2D
+      layer.
+    """
+    # Make sure 'alpha' is always applied to the last convolution block's size
+    # (This overrides the Keras application's functionality)
+    if kwargs.get('name') == 'Conv_1' and self._alpha < 1.0:
+      filters = _make_divisible(1280 * self._alpha, 8)
+
+    # Apply the minimum depth to the convolution layers
+    if (self._min_depth and (filters < self._min_depth)
+        and not kwargs.get('name').endswith('expand')):
+      filters = self._min_depth
+
+    if self._conv_hyperparams:
+      kwargs = self._conv_hyperparams.params(**kwargs)
+
+    kwargs['padding'] = 'same'
+    kernel_size = kwargs.get('kernel_size')
+    if self._use_explicit_padding and kernel_size > 1:
+      kwargs['padding'] = 'valid'
+      def padded_conv(features):
+        padded_features = self._FixedPaddingLayer(kernel_size)(features)
+        return tf.keras.layers.Conv2D(filters, **kwargs)(padded_features)
+
+      return padded_conv
+    else:
+      return tf.keras.layers.Conv2D(filters, **kwargs)
+
+  def DepthwiseConv2D(self, **kwargs):
+    """Builds a DepthwiseConv2D according to the Object Detection config.
+
+    Overrides the Keras MobileNetV2 application's convolutions with ones that
+    follow the spec specified by the Object Detection hyperparameters.
+
+    Args:
+      **kwargs: Keyword args specified by the Keras application for
+        constructing the convolution.
+
+    Returns:
+      A one-arg callable that will either directly apply a Keras DepthwiseConv2D
+      layer to the input argument, or that will first pad the input then apply
+      the depthwise convolution.
+    """
+    if self._conv_hyperparams:
+      kwargs = self._conv_hyperparams.params(**kwargs)
+
+    kwargs['padding'] = 'same'
+    kernel_size = kwargs.get('kernel_size')
+    if self._use_explicit_padding and kernel_size > 1:
+      kwargs['padding'] = 'valid'
+      def padded_depthwise_conv(features):
+        padded_features = self._FixedPaddingLayer(kernel_size)(features)
+        return tf.keras.layers.DepthwiseConv2D(**kwargs)(padded_features)
+
+      return padded_depthwise_conv
+    else:
+      return tf.keras.layers.DepthwiseConv2D(**kwargs)
+
+  def BatchNormalization(self, **kwargs):
+    """Builds a normalization layer.
+
+    Overrides the Keras application batch norm with the norm specified by the
+    Object Detection configuration.
+
+    Args:
+      **kwargs: Only the name is used, all other params ignored.
+        Required for matching `layers.BatchNormalization` calls in the Keras
+        application.
+
+    Returns:
+      A normalization layer specified by the Object Detection hyperparameter
+      configurations.
+    """
+    name = kwargs.get('name')
+    if self._conv_hyperparams:
+      return self._conv_hyperparams.build_batch_norm(
+          training=self._batchnorm_training,
+          name=name)
+    else:
+      return freezable_batch_norm.FreezableBatchNorm(
+          training=self._batchnorm_training,
+          epsilon=1e-3,
+          momentum=self._default_batchnorm_momentum,
+          name=name)
+
+  def Input(self, shape):
+    """Builds an Input layer.
+
+    Overrides the Keras application Input layer with one that uses a
+    tf.placeholder_with_default instead of a tf.placeholder. This is necessary
+    to ensure the application works when run on a TPU.
+
+    Args:
+      shape: The shape for the input layer to use. (Does not include a dimension
+        for the batch size).
+    Returns:
+      An input layer for the specified shape that internally uses a
+      placeholder_with_default.
+    """
+    default_size = 224
+    default_batch_size = 1
+    shape = list(shape)
+    default_shape = [default_size if dim is None else dim for dim in shape]
+
+    input_tensor = tf.constant(0.0, shape=[default_batch_size] + default_shape)
+
+    placeholder_with_default = tf.placeholder_with_default(
+        input=input_tensor, shape=[None] + shape)
+    return tf.keras.layers.Input(tensor=placeholder_with_default)
+
+  # pylint: disable=unused-argument
+  def ReLU(self, *args, **kwargs):
+    """Builds an activation layer.
+
+    Overrides the Keras application ReLU with the activation specified by the
+    Object Detection configuration.
+
+    Args:
+      *args: Ignored, required to match the `tf.keras.ReLU` interface
+      **kwargs: Only the name is used,
+        required to match `tf.keras.ReLU` interface
+
+    Returns:
+      An activation layer specified by the Object Detection hyperparameter
+      configurations.
+    """
+    name = kwargs.get('name')
+    if self._conv_hyperparams:
+      return self._conv_hyperparams.build_activation_layer(name=name)
+    else:
+      return tf.keras.layers.Lambda(tf.nn.relu6, name=name)
+  # pylint: enable=unused-argument
+
+  # pylint: disable=unused-argument
+  def ZeroPadding2D(self, **kwargs):
+    """Replaces explicit padding in the Keras application with a no-op.
+
+    Args:
+      **kwargs: Ignored, required to match the Keras applications usage.
+
+    Returns:
+      A no-op identity lambda.
+    """
+    return lambda x: x
+  # pylint: enable=unused-argument
+
+  # Forward all non-overridden methods to the keras layers
+  def __getattr__(self, item):
+    return getattr(tf.keras.layers, item)
+
+
+def mobilenet_v2(batchnorm_training,
+                 default_batchnorm_momentum=0.9997,
+                 conv_hyperparams=None,
+                 use_explicit_padding=False,
+                 alpha=1.0,
+                 min_depth=None,
+                 **kwargs):
+  """Instantiates the MobileNetV2 architecture, modified for object detection.
+
+  This wraps the MobileNetV2 tensorflow Keras application, but uses the
+  Keras application's kwargs-based monkey-patching API to override the Keras
+  architecture with the following changes:
+
+  - Changes the default batchnorm momentum to 0.9997
+  - Applies the Object Detection hyperparameter configuration
+  - Supports FreezableBatchNorms
+  - Adds support for a min number of filters for each layer
+  - Makes the `alpha` parameter affect the final convolution block even if it
+      is less than 1.0
+  - Adds support for explicit padding of convolutions
+  - Makes the Input layer use a tf.placeholder_with_default instead of a
+      tf.placeholder, to work on TPUs.
+
+  Args:
+      batchnorm_training: Bool. Assigned to Batch norm layer `training` param
+        when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
+      default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
+        batch norm layers will be constructed using this value as the momentum.
+      conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
+        containing hyperparameters for convolution ops. Optionally set to `None`
+        to use default mobilenet_v2 layer builders.
+      use_explicit_padding: If True, use 'valid' padding for convolutions,
+        but explicitly pre-pads inputs so that the output dimensions are the
+        same as if 'same' padding were used. Off by default.
+      alpha: The width multiplier referenced in the MobileNetV2 paper. It
+        modifies the number of filters in each convolutional layer.
+      min_depth: Minimum number of filters in the convolutional layers.
+      **kwargs: Keyword arguments forwarded directly to the
+        `tf.keras.applications.MobilenetV2` method that constructs the Keras
+        model.
+
+  Returns:
+      A Keras model instance.
+  """
+  layers_override = _LayersOverride(
+      batchnorm_training,
+      default_batchnorm_momentum=default_batchnorm_momentum,
+      conv_hyperparams=conv_hyperparams,
+      use_explicit_padding=use_explicit_padding,
+      min_depth=min_depth,
+      alpha=alpha)
+  return tf.keras.applications.MobileNetV2(alpha=alpha,
+                                           layers=layers_override,
+                                           **kwargs)
+# pylint: enable=invalid-name
--- a/research/object_detection/models/keras_applications/mobilenet_v2_test.py
+++ b/research/object_detection/models/keras_applications/mobilenet_v2_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for mobilenet_v2."""
+import itertools
+import numpy as np
+import tensorflow as tf
+
+from google.protobuf import text_format
+
+from object_detection.builders import hyperparams_builder
+from object_detection.models.keras_applications import mobilenet_v2
+from object_detection.protos import hyperparams_pb2
+from object_detection.utils import test_case
+
+_layers_to_check = [
+    'Conv1_relu',
+    'block_1_expand_relu', 'block_1_depthwise_relu', 'block_1_project_BN',
+    'block_2_expand_relu', 'block_2_depthwise_relu', 'block_2_project_BN',
+    'block_3_expand_relu', 'block_3_depthwise_relu', 'block_3_project_BN',
+    'block_4_expand_relu', 'block_4_depthwise_relu', 'block_4_project_BN',
+    'block_5_expand_relu', 'block_5_depthwise_relu', 'block_5_project_BN',
+    'block_6_expand_relu', 'block_6_depthwise_relu', 'block_6_project_BN',
+    'block_7_expand_relu', 'block_7_depthwise_relu', 'block_7_project_BN',
+    'block_8_expand_relu', 'block_8_depthwise_relu', 'block_8_project_BN',
+    'block_9_expand_relu', 'block_9_depthwise_relu', 'block_9_project_BN',
+    'block_10_expand_relu', 'block_10_depthwise_relu', 'block_10_project_BN',
+    'block_11_expand_relu', 'block_11_depthwise_relu', 'block_11_project_BN',
+    'block_12_expand_relu', 'block_12_depthwise_relu', 'block_12_project_BN',
+    'block_13_expand_relu', 'block_13_depthwise_relu', 'block_13_project_BN',
+    'block_14_expand_relu', 'block_14_depthwise_relu', 'block_14_project_BN',
+    'block_15_expand_relu', 'block_15_depthwise_relu', 'block_15_project_BN',
+    'block_16_expand_relu', 'block_16_depthwise_relu', 'block_16_project_BN',
+    'out_relu']
+
+
+class MobilenetV2Test(test_case.TestCase):
+
+  def _build_conv_hyperparams(self):
+    conv_hyperparams = hyperparams_pb2.Hyperparams()
+    conv_hyperparams_text_proto = """
+      activation: RELU_6
+      regularizer {
+        l2_regularizer {
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+        }
+      }
+      batch_norm {
+        train: true,
+        scale: false,
+        center: true,
+        decay: 0.2,
+        epsilon: 0.1,
+      }
+    """
+    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
+
+  def _create_application_with_layer_outputs(
+      self, layer_names, batchnorm_training,
+      conv_hyperparams=None,
+      use_explicit_padding=False,
+      alpha=1.0,
+      min_depth=None):
+    """Constructs Keras mobilenetv2 that extracts intermediate layer outputs."""
+    if not layer_names:
+      layer_names = _layers_to_check
+    full_model = mobilenet_v2.mobilenet_v2(
+        batchnorm_training=batchnorm_training,
+        conv_hyperparams=conv_hyperparams,
+        weights=None,
+        use_explicit_padding=use_explicit_padding,
+        alpha=alpha,
+        min_depth=min_depth,
+        include_top=False)
+    layer_outputs = [full_model.get_layer(name=layer).output
+                     for layer in layer_names]
+    return tf.keras.Model(
+        inputs=full_model.inputs,
+        outputs=layer_outputs)
+
+  def _check_returns_correct_shape(
+      self, batch_size, image_height, image_width, depth_multiplier,
+      expected_feature_map_shapes, use_explicit_padding=False, min_depth=None,
+      layer_names=None):
+    def graph_fn(image_tensor):
+      model = self._create_application_with_layer_outputs(
+          layer_names=layer_names,
+          batchnorm_training=False, use_explicit_padding=use_explicit_padding,
+          min_depth=min_depth,
+          alpha=depth_multiplier)
+      return model(image_tensor)
+
+    image_tensor = np.random.rand(batch_size, image_height, image_width,
+                                  3).astype(np.float32)
+    feature_maps = self.execute(graph_fn, [image_tensor])
+
+    for feature_map, expected_shape in itertools.izip(
+        feature_maps, expected_feature_map_shapes):
+      self.assertAllEqual(feature_map.shape, expected_shape)
+
+  def _check_returns_correct_shapes_with_dynamic_inputs(
+      self, batch_size, image_height, image_width, depth_multiplier,
+      expected_feature_map_shapes, use_explicit_padding=False,
+      layer_names=None):
+    def graph_fn(image_height, image_width):
+      image_tensor = tf.random_uniform([batch_size, image_height, image_width,
+                                        3], dtype=tf.float32)
+      model = self._create_application_with_layer_outputs(
+          layer_names=layer_names,
+          batchnorm_training=False, use_explicit_padding=use_explicit_padding,
+          alpha=depth_multiplier)
+      return model(image_tensor)
+
+    feature_maps = self.execute_cpu(graph_fn, [
+        np.array(image_height, dtype=np.int32),
+        np.array(image_width, dtype=np.int32)
+    ])
+
+    for feature_map, expected_shape in itertools.izip(
+        feature_maps, expected_feature_map_shapes):
+      self.assertAllEqual(feature_map.shape, expected_shape)
+
+  def _get_variables(self, depth_multiplier, layer_names=None):
+    g = tf.Graph()
+    with g.as_default():
+      preprocessed_inputs = tf.placeholder(tf.float32, (4, None, None, 3))
+      model = self._create_application_with_layer_outputs(
+          layer_names=layer_names,
+          batchnorm_training=False, use_explicit_padding=False,
+          alpha=depth_multiplier)
+      model(preprocessed_inputs)
+      return g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+
+  def test_returns_correct_shapes_128(self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1.0
+    expected_feature_map_shape = [(2, 64, 64, 32),
+                                  (2, 64, 64, 96),
+                                  (2, 32, 32, 96),
+                                  (2, 32, 32, 24),
+                                  (2, 32, 32, 144),
+                                  (2, 32, 32, 144),
+                                  (2, 32, 32, 24),
+                                  (2, 32, 32, 144),
+                                  (2, 16, 16, 144),
+                                  (2, 16, 16, 32),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 32),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 32),
+                                  (2, 16, 16, 192),
+                                  (2, 8, 8, 192),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 96),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 96),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 96),
+                                  (2, 8, 8, 576),
+                                  (2, 4, 4, 576),
+                                  (2, 4, 4, 160),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 160),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 160),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 320),
+                                  (2, 4, 4, 1280)]
+
+    self._check_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier,
+        expected_feature_map_shape)
+
+  def test_returns_correct_shapes_128_explicit_padding(
+      self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1.0
+    expected_feature_map_shape = [(2, 64, 64, 32),
+                                  (2, 64, 64, 96),
+                                  (2, 32, 32, 96),
+                                  (2, 32, 32, 24),
+                                  (2, 32, 32, 144),
+                                  (2, 32, 32, 144),
+                                  (2, 32, 32, 24),
+                                  (2, 32, 32, 144),
+                                  (2, 16, 16, 144),
+                                  (2, 16, 16, 32),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 32),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 32),
+                                  (2, 16, 16, 192),
+                                  (2, 8, 8, 192),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 96),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 96),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 96),
+                                  (2, 8, 8, 576),
+                                  (2, 4, 4, 576),
+                                  (2, 4, 4, 160),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 160),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 160),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 320),
+                                  (2, 4, 4, 1280)]
+    self._check_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier,
+        expected_feature_map_shape, use_explicit_padding=True)
+
+  def test_returns_correct_shapes_with_dynamic_inputs(
+      self):
+    image_height = 128
+    image_width = 128
+    depth_multiplier = 1.0
+    expected_feature_map_shape = [(2, 64, 64, 32),
+                                  (2, 64, 64, 96),
+                                  (2, 32, 32, 96),
+                                  (2, 32, 32, 24),
+                                  (2, 32, 32, 144),
+                                  (2, 32, 32, 144),
+                                  (2, 32, 32, 24),
+                                  (2, 32, 32, 144),
+                                  (2, 16, 16, 144),
+                                  (2, 16, 16, 32),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 32),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 192),
+                                  (2, 16, 16, 32),
+                                  (2, 16, 16, 192),
+                                  (2, 8, 8, 192),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 64),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 384),
+                                  (2, 8, 8, 96),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 96),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 576),
+                                  (2, 8, 8, 96),
+                                  (2, 8, 8, 576),
+                                  (2, 4, 4, 576),
+                                  (2, 4, 4, 160),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 160),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 160),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 960),
+                                  (2, 4, 4, 320),
+                                  (2, 4, 4, 1280)]
+    self._check_returns_correct_shapes_with_dynamic_inputs(
+        2, image_height, image_width, depth_multiplier,
+        expected_feature_map_shape)
+
+  def test_returns_correct_shapes_299(self):
+    image_height = 299
+    image_width = 299
+    depth_multiplier = 1.0
+    expected_feature_map_shape = [(2, 150, 150, 32),
+                                  (2, 150, 150, 96),
+                                  (2, 75, 75, 96),
+                                  (2, 75, 75, 24),
+                                  (2, 75, 75, 144),
+                                  (2, 75, 75, 144),
+                                  (2, 75, 75, 24),
+                                  (2, 75, 75, 144),
+                                  (2, 38, 38, 144),
+                                  (2, 38, 38, 32),
+                                  (2, 38, 38, 192),
+                                  (2, 38, 38, 192),
+                                  (2, 38, 38, 32),
+                                  (2, 38, 38, 192),
+                                  (2, 38, 38, 192),
+                                  (2, 38, 38, 32),
+                                  (2, 38, 38, 192),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 64),
+                                  (2, 19, 19, 384),
+                                  (2, 19, 19, 384),
+                                  (2, 19, 19, 64),
+                                  (2, 19, 19, 384),
+                                  (2, 19, 19, 384),
+                                  (2, 19, 19, 64),
+                                  (2, 19, 19, 384),
+                                  (2, 19, 19, 384),
+                                  (2, 19, 19, 64),
+                                  (2, 19, 19, 384),
+                                  (2, 19, 19, 384),
+                                  (2, 19, 19, 96),
+                                  (2, 19, 19, 576),
+                                  (2, 19, 19, 576),
+                                  (2, 19, 19, 96),
+                                  (2, 19, 19, 576),
+                                  (2, 19, 19, 576),
+                                  (2, 19, 19, 96),
+                                  (2, 19, 19, 576),
+                                  (2, 10, 10, 576),
+                                  (2, 10, 10, 160),
+                                  (2, 10, 10, 960),
+                                  (2, 10, 10, 960),
+                                  (2, 10, 10, 160),
+                                  (2, 10, 10, 960),
+                                  (2, 10, 10, 960),
+                                  (2, 10, 10, 160),
+                                  (2, 10, 10, 960),
+                                  (2, 10, 10, 960),
+                                  (2, 10, 10, 320),
+                                  (2, 10, 10, 1280)]
+
+    self._check_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier,
+        expected_feature_map_shape)
+
+  def test_returns_correct_shapes_enforcing_min_depth(
+      self):
+    image_height = 299
+    image_width = 299
+    depth_multiplier = 0.5**12
+    expected_feature_map_shape = [(2, 150, 150, 32),
+                                  (2, 150, 150, 192),
+                                  (2, 75, 75, 192),
+                                  (2, 75, 75, 32),
+                                  (2, 75, 75, 192),
+                                  (2, 75, 75, 192),
+                                  (2, 75, 75, 32),
+                                  (2, 75, 75, 192),
+                                  (2, 38, 38, 192),
+                                  (2, 38, 38, 32),
+                                  (2, 38, 38, 192),
+                                  (2, 38, 38, 192),
+                                  (2, 38, 38, 32),
+                                  (2, 38, 38, 192),
+                                  (2, 38, 38, 192),
+                                  (2, 38, 38, 32),
+                                  (2, 38, 38, 192),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 32),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 32),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 32),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 32),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 32),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 32),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 192),
+                                  (2, 19, 19, 32),
+                                  (2, 19, 19, 192),
+                                  (2, 10, 10, 192),
+                                  (2, 10, 10, 32),
+                                  (2, 10, 10, 192),
+                                  (2, 10, 10, 192),
+                                  (2, 10, 10, 32),
+                                  (2, 10, 10, 192),
+                                  (2, 10, 10, 192),
+                                  (2, 10, 10, 32),
+                                  (2, 10, 10, 192),
+                                  (2, 10, 10, 192),
+                                  (2, 10, 10, 32),
+                                  (2, 10, 10, 32)]
+    self._check_returns_correct_shape(
+        2, image_height, image_width, depth_multiplier,
+        expected_feature_map_shape, min_depth=32)
+
+  def test_hyperparam_override(self):
+    hyperparams = self._build_conv_hyperparams()
+    model = mobilenet_v2.mobilenet_v2(
+        batchnorm_training=True,
+        conv_hyperparams=hyperparams,
+        weights=None,
+        use_explicit_padding=False,
+        alpha=1.0,
+        min_depth=32,
+        include_top=False)
+    hyperparams.params()
+    bn_layer = model.get_layer(name='block_5_project_BN')
+    self.assertAllClose(bn_layer.momentum, 0.2)
+    self.assertAllClose(bn_layer.epsilon, 0.1)
+
+  def test_variable_count(self):
+    depth_multiplier = 1
+    variables = self._get_variables(depth_multiplier)
+    self.assertEqual(len(variables), 260)
+
+
+if __name__ == '__main__':
+  tf.test.main()