Merge remote-tracking branch 'upstream/master' into context_tf2

afd5579f · Kaushik Shivakumar · dcd96e02 · 567bd18d · afd5579f · afd5579f
Commit afd5579f authored Jul 22, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/attention_ocr/python/model_export_lib.py
+++ b/research/attention_ocr/python/model_export_lib.py
@@ -36,7 +36,7 @@ def normalize_image(image, original_minval, original_maxval, target_minval,
  Returns:
    image: image which is the same shape as input image.
  """
-  with tf.name_scope('NormalizeImage', values=[image]):
+  with tf.compat.v1.name_scope('NormalizeImage', values=[image]):
    original_minval = float(original_minval)
    original_maxval = float(original_maxval)
    target_minval = float(target_minval)
@@ -68,16 +68,17 @@ def generate_tfexample_image(input_example_strings,
    A tensor with shape [batch_size, height, width, channels] of type float32
    with values in the range [0..1]
  """
-  batch_size = tf.shape(input_example_strings)[0]
+  batch_size = tf.shape(input=input_example_strings)[0]
  images_shape = tf.stack(
      [batch_size, image_height, image_width, image_channels])
  tf_example_image_key = 'image/encoded'
  feature_configs = {
      tf_example_image_key:
-          tf.FixedLenFeature(
+          tf.io.FixedLenFeature(
              image_height * image_width * image_channels, dtype=tf.float32)
  }
-  feature_tensors = tf.parse_example(input_example_strings, feature_configs)
+  feature_tensors = tf.io.parse_example(
+      serialized=input_example_strings, features=feature_configs)
  float_images = tf.reshape(
      normalize_image(
          feature_tensors[tf_example_image_key],
@@ -97,11 +98,11 @@ def attention_ocr_attention_masks(num_characters):
  names = ['%s/Softmax:0' % (prefix)]
  for i in range(1, num_characters):
    names += ['%s_%d/Softmax:0' % (prefix, i)]
-  return [tf.get_default_graph().get_tensor_by_name(n) for n in names]
+  return [tf.compat.v1.get_default_graph().get_tensor_by_name(n) for n in names]
 def build_tensor_info(tensor_dict):
  return {
-      k: tf.saved_model.utils.build_tensor_info(t)
+      k: tf.compat.v1.saved_model.utils.build_tensor_info(t)
      for k, t in tensor_dict.items()
  }
--- a/research/attention_ocr/python/model_export_test.py
+++ b/research/attention_ocr/python/model_export_test.py
@@ -29,7 +29,7 @@ _CHECKPOINT_URL = (
 def _clean_up():
-  tf.gfile.DeleteRecursively(tf.test.get_temp_dir())
+  tf.io.gfile.rmtree(tf.compat.v1.test.get_temp_dir())
 def _create_tf_example_string(image):
@@ -47,7 +47,7 @@ class AttentionOcrExportTest(tf.test.TestCase):
    for suffix in ['.meta', '.index', '.data-00000-of-00001']:
      filename = _CHECKPOINT + suffix
      self.assertTrue(
-          tf.gfile.Exists(filename),
+          tf.io.gfile.exists(filename),
          msg='Missing checkpoint file %s. '
          'Please download and extract it from %s' %
          (filename, _CHECKPOINT_URL))
@@ -57,7 +57,8 @@ class AttentionOcrExportTest(tf.test.TestCase):
        os.path.dirname(__file__), 'datasets/testdata/fsns')
    tf.test.TestCase.setUp(self)
    _clean_up()
-    self.export_dir = os.path.join(tf.test.get_temp_dir(), 'exported_model')
+    self.export_dir = os.path.join(
+        tf.compat.v1.test.get_temp_dir(), 'exported_model')
    self.minimal_output_signature = {
        'predictions': 'AttentionOcr_v1/predicted_chars:0',
        'scores': 'AttentionOcr_v1/predicted_scores:0',
@@ -93,10 +94,10 @@ class AttentionOcrExportTest(tf.test.TestCase):
                              size=self.dataset.image_shape).astype('uint8'),
    }
    signature_def = graph_def.signature_def[
-        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+        tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
    if serving:
      input_name = signature_def.inputs[
-          tf.saved_model.signature_constants.CLASSIFY_INPUTS].name
+          tf.saved_model.CLASSIFY_INPUTS].name
      # Model for serving takes input: inputs['inputs'] = 'tf_example:0'
      feed_dict = {
          input_name: [
@@ -126,11 +127,11 @@ class AttentionOcrExportTest(tf.test.TestCase):
      export_for_serving: True if the model was exported for Serving. This
        affects how input is fed into the model.
    """
-    tf.reset_default_graph()
+    tf.compat.v1.reset_default_graph()
-    sess = tf.Session()
+    sess = tf.compat.v1.Session()
-    graph_def = tf.saved_model.loader.load(
+    graph_def = tf.compat.v1.saved_model.loader.load(
        sess=sess,
-        tags=[tf.saved_model.tag_constants.SERVING],
+        tags=[tf.saved_model.SERVING],
        export_dir=self.export_dir)
    feed_dict = self.create_input_feed(graph_def, export_for_serving)
    results = sess.run(self.minimal_output_signature, feed_dict=feed_dict)

--- a/research/attention_ocr/python/model_test.py
+++ b/research/attention_ocr/python/model_test.py
@@ -52,7 +52,7 @@ class ModelTest(tf.test.TestCase):
                              self.num_char_classes)
    self.length_logit_shape = (self.batch_size, self.seq_length + 1)
    # Placeholder knows image dimensions, but not batch size.
-    self.input_images = tf.placeholder(
+    self.input_images = tf.compat.v1.placeholder(
        tf.float32,
        shape=(None, self.image_height, self.image_width, 3),
        name='input_node')
@@ -89,8 +89,8 @@ class ModelTest(tf.test.TestCase):
    with self.test_session() as sess:
      endpoints_tf = ocr_model.create_base(
          images=self.input_images, labels_one_hot=None)
-      sess.run(tf.global_variables_initializer())
+      sess.run(tf.compat.v1.global_variables_initializer())
-      tf.tables_initializer().run()
+      tf.compat.v1.tables_initializer().run()
      endpoints = sess.run(
          endpoints_tf, feed_dict={self.input_images: self.fake_images})
@@ -127,7 +127,7 @@ class ModelTest(tf.test.TestCase):
      ocr_model = self.create_model()
      conv_tower = ocr_model.conv_tower_fn(self.input_images)
-      sess.run(tf.global_variables_initializer())
+      sess.run(tf.compat.v1.global_variables_initializer())
      conv_tower_np = sess.run(
          conv_tower, feed_dict={self.input_images: self.fake_images})
@@ -141,9 +141,9 @@ class ModelTest(tf.test.TestCase):
    ocr_model = self.create_model()
    ocr_model.create_base(images=self.input_images, labels_one_hot=None)
    with self.test_session() as sess:
-      tfprof_root = tf.profiler.profile(
+      tfprof_root = tf.compat.v1.profiler.profile(
          sess.graph,
-          options=tf.profiler.ProfileOptionBuilder
+          options=tf.compat.v1.profiler.ProfileOptionBuilder
          .trainable_variables_parameter())
      model_size_bytes = 4 * tfprof_root.total_parameters
@@ -163,9 +163,9 @@ class ModelTest(tf.test.TestCase):
    summaries = ocr_model.create_summaries(
        data, endpoints, charset, is_training=False)
    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(tf.compat.v1.global_variables_initializer())
-      sess.run(tf.local_variables_initializer())
+      sess.run(tf.compat.v1.local_variables_initializer())
-      tf.tables_initializer().run()
+      tf.compat.v1.tables_initializer().run()
      sess.run(summaries)  # just check it is runnable
  def test_sequence_loss_function_without_label_smoothing(self):
@@ -188,7 +188,7 @@ class ModelTest(tf.test.TestCase):
    Returns:
      a list of tensors with encoded image coordinates in them.
    """
-    batch_size = tf.shape(net)[0]
+    batch_size = tf.shape(input=net)[0]
    _, h, w, _ = net.shape.as_list()
    h_loc = [
        tf.tile(
@@ -200,7 +200,8 @@ class ModelTest(tf.test.TestCase):
    h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2)
    w_loc = [
        tf.tile(
-            tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w),
+            tf.contrib.layers.one_hot_encoding(
+                tf.constant([i]), num_classes=w),
            [h, 1]) for i in range(w)
    ]
    w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2)
@@ -272,8 +273,8 @@ class ModelTest(tf.test.TestCase):
      endpoints_tf = ocr_model.create_base(
          images=self.fake_images, labels_one_hot=None)
-      sess.run(tf.global_variables_initializer())
+      sess.run(tf.compat.v1.global_variables_initializer())
-      tf.tables_initializer().run()
+      tf.compat.v1.tables_initializer().run()
      endpoints = sess.run(endpoints_tf)
      self.assertEqual(endpoints.predicted_text.shape, (self.batch_size,))
@@ -289,7 +290,7 @@ class CharsetMapperTest(tf.test.TestCase):
    charset_mapper = model.CharsetMapper(charset)
    with self.test_session() as sess:
-      tf.tables_initializer().run()
+      tf.compat.v1.tables_initializer().run()
      text = sess.run(charset_mapper.get_text(ids))
    self.assertAllEqual(text, [b'hello', b'world'])

--- a/research/attention_ocr/python/sequence_layers.py
+++ b/research/attention_ocr/python/sequence_layers.py
@@ -111,12 +111,12 @@ class SequenceLayerBase(object):
    self._mparams = method_params
    self._net = net
    self._labels_one_hot = labels_one_hot
-    self._batch_size = tf.shape(net)[0]
+    self._batch_size = tf.shape(input=net)[0]
    # Initialize parameters for char logits which will be computed on the fly
    # inside an LSTM decoder.
    self._char_logits = {}
-    regularizer = slim.l2_regularizer(self._mparams.weight_decay)
+    regularizer = tf.keras.regularizers.l2(0.5 * (self._mparams.weight_decay))
    self._softmax_w = slim.model_variable(
        'softmax_w',
        [self._mparams.num_lstm_units, self._params.num_char_classes],
@@ -124,7 +124,7 @@ class SequenceLayerBase(object):
        regularizer=regularizer)
    self._softmax_b = slim.model_variable(
        'softmax_b', [self._params.num_char_classes],
-        initializer=tf.zeros_initializer(),
+        initializer=tf.compat.v1.zeros_initializer(),
        regularizer=regularizer)
  @abc.abstractmethod
@@ -203,8 +203,8 @@ class SequenceLayerBase(object):
      A tensor with shape [batch_size, num_char_classes]
    """
    if char_index not in self._char_logits:
-      self._char_logits[char_index] = tf.nn.xw_plus_b(inputs, self._softmax_w,
+      self._char_logits[char_index] = tf.compat.v1.nn.xw_plus_b(inputs, self._softmax_w,
-                                                      self._softmax_b)
+                                                                self._softmax_b)
    return self._char_logits[char_index]
  def char_one_hot(self, logit):
@@ -216,7 +216,7 @@ class SequenceLayerBase(object):
    Returns:
      A tensor with shape [batch_size, num_char_classes]
    """
-    prediction = tf.argmax(logit, axis=1)
+    prediction = tf.argmax(input=logit, axis=1)
    return slim.one_hot_encoding(prediction, self._params.num_char_classes)
  def get_input(self, prev, i):
@@ -244,10 +244,10 @@ class SequenceLayerBase(object):
    Returns:
      A tensor with shape [batch_size, seq_length, num_char_classes].
    """
-    with tf.variable_scope('LSTM'):
+    with tf.compat.v1.variable_scope('LSTM'):
      first_label = self.get_input(prev=None, i=0)
      decoder_inputs = [first_label] + [None] * (self._params.seq_length - 1)
-      lstm_cell = tf.contrib.rnn.LSTMCell(
+      lstm_cell = tf.compat.v1.nn.rnn_cell.LSTMCell(
          self._mparams.num_lstm_units,
          use_peepholes=False,
          cell_clip=self._mparams.lstm_state_clip_value,
@@ -259,9 +259,9 @@ class SequenceLayerBase(object):
          loop_function=self.get_input,
          cell=lstm_cell)
-    with tf.variable_scope('logits'):
+    with tf.compat.v1.variable_scope('logits'):
      logits_list = [
-          tf.expand_dims(self.char_logit(logit, i), dim=1)
+          tf.expand_dims(self.char_logit(logit, i), axis=1)
          for i, logit in enumerate(lstm_outputs)
      ]

--- a/research/attention_ocr/python/sequence_layers_test.py
+++ b/research/attention_ocr/python/sequence_layers_test.py
@@ -29,13 +29,13 @@ import sequence_layers
 def fake_net(batch_size, num_features, feature_size):
  return tf.convert_to_tensor(
-      np.random.uniform(size=(batch_size, num_features, feature_size)),
+      value=np.random.uniform(size=(batch_size, num_features, feature_size)),
      dtype=tf.float32)
 def fake_labels(batch_size, seq_length, num_char_classes):
  labels_np = tf.convert_to_tensor(
-      np.random.randint(
+      value=np.random.randint(
          low=0, high=num_char_classes, size=(batch_size, seq_length)))
  return slim.one_hot_encoding(labels_np, num_classes=num_char_classes)

--- a/research/attention_ocr/python/train.py
+++ b/research/attention_ocr/python/train.py
@@ -96,16 +96,16 @@ def get_training_hparams():
 def create_optimizer(hparams):
  """Creates optimized based on the specified flags."""
  if hparams.optimizer == 'momentum':
-    optimizer = tf.train.MomentumOptimizer(
+    optimizer = tf.compat.v1.train.MomentumOptimizer(
        hparams.learning_rate, momentum=hparams.momentum)
  elif hparams.optimizer == 'adam':
-    optimizer = tf.train.AdamOptimizer(hparams.learning_rate)
+    optimizer = tf.compat.v1.train.AdamOptimizer(hparams.learning_rate)
  elif hparams.optimizer == 'adadelta':
-    optimizer = tf.train.AdadeltaOptimizer(hparams.learning_rate)
+    optimizer = tf.compat.v1.train.AdadeltaOptimizer(hparams.learning_rate)
  elif hparams.optimizer == 'adagrad':
-    optimizer = tf.train.AdagradOptimizer(hparams.learning_rate)
+    optimizer = tf.compat.v1.train.AdagradOptimizer(hparams.learning_rate)
  elif hparams.optimizer == 'rmsprop':
-    optimizer = tf.train.RMSPropOptimizer(
+    optimizer = tf.compat.v1.train.RMSPropOptimizer(
        hparams.learning_rate, momentum=hparams.momentum)
  return optimizer
@@ -154,14 +154,14 @@ def train(loss, init_fn, hparams):
 def prepare_training_dir():
-  if not tf.gfile.Exists(FLAGS.train_log_dir):
+  if not tf.io.gfile.exists(FLAGS.train_log_dir):
    logging.info('Create a new training directory %s', FLAGS.train_log_dir)
-    tf.gfile.MakeDirs(FLAGS.train_log_dir)
+    tf.io.gfile.makedirs(FLAGS.train_log_dir)
  else:
    if FLAGS.reset_train_dir:
      logging.info('Reset the training directory %s', FLAGS.train_log_dir)
-      tf.gfile.DeleteRecursively(FLAGS.train_log_dir)
+      tf.io.gfile.rmtree(FLAGS.train_log_dir)
-      tf.gfile.MakeDirs(FLAGS.train_log_dir)
+      tf.io.gfile.makedirs(FLAGS.train_log_dir)
    else:
      logging.info('Use already existing training directory %s',
                   FLAGS.train_log_dir)
@@ -169,7 +169,7 @@ def prepare_training_dir():
 def calculate_graph_metrics():
  param_stats = model_analyzer.print_model_analysis(
-      tf.get_default_graph(),
+      tf.compat.v1.get_default_graph(),
      tfprof_options=model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
  return param_stats.total_parameters
@@ -186,7 +186,7 @@ def main(_):
  # If ps_tasks is zero, the local device is used. When using multiple
  # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
  # across the different devices.
-  device_setter = tf.train.replica_device_setter(
+  device_setter = tf.compat.v1.train.replica_device_setter(
      FLAGS.ps_tasks, merge_devices=True)
  with tf.device(device_setter):
    data = data_provider.get_data(

--- a/research/attention_ocr/python/utils.py
+++ b/research/attention_ocr/python/utils.py
@@ -37,16 +37,16 @@ def logits_to_log_prob(logits):
    probabilities.
  """
-  with tf.variable_scope('log_probabilities'):
+  with tf.compat.v1.variable_scope('log_probabilities'):
    reduction_indices = len(logits.shape.as_list()) - 1
    max_logits = tf.reduce_max(
-        logits, reduction_indices=reduction_indices, keep_dims=True)
+        input_tensor=logits, axis=reduction_indices, keepdims=True)
    safe_logits = tf.subtract(logits, max_logits)
    sum_exp = tf.reduce_sum(
-        tf.exp(safe_logits),
+        input_tensor=tf.exp(safe_logits),
-        reduction_indices=reduction_indices,
+        axis=reduction_indices,
-        keep_dims=True)
+        keepdims=True)
-    log_probs = tf.subtract(safe_logits, tf.log(sum_exp))
+    log_probs = tf.subtract(safe_logits, tf.math.log(sum_exp))
  return log_probs
@@ -91,7 +91,7 @@ def ConvertAllInputsToTensors(func):
  """
  def FuncWrapper(*args):
-    tensors = [tf.convert_to_tensor(a) for a in args]
+    tensors = [tf.convert_to_tensor(value=a) for a in args]
    return func(*tensors)
  return FuncWrapper
--- a/research/object_detection/README.md
+++ b/research/object_detection/README.md
@@ -109,6 +109,18 @@ Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer
 Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi,
 Yixin Shi, Yu-hui Chen, Zhichao Lu.
+### MobileDet GPU
+We have released SSDLite with MobileDet GPU backbone, which achieves 17% mAP
+higher than the MobileNetV2 SSDLite (27.5 mAP vs 23.5 mAP) on a NVIDIA Jetson
+Xavier at comparable latency (3.2ms vs 3.3ms).
+Along with the model definition, we are also releasing model checkpoints trained
+on the COCO dataset.
+<b>Thanks to contributors</b>: Yongzhe Wang, Bo Chen, Hanxiao Liu, Le An
+(NVIDIA), Yu-Te Cheng (NVIDIA), Oliver Knieps (NVIDIA), and Josh Park (NVIDIA).
 ### Context R-CNN
 We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that

--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -39,6 +39,7 @@ from object_detection.protos import losses_pb2
 from object_detection.protos import model_pb2
 from object_detection.utils import label_map_util
 from object_detection.utils import ops
+from object_detection.utils import spatial_transform_ops as spatial_ops
 from object_detection.utils import tf_version
 ## Feature Extractors for TF
@@ -48,6 +49,7 @@ from object_detection.utils import tf_version
 # pylint: disable=g-import-not-at-top
 if tf_version.is_tf2():
  from object_detection.models import center_net_hourglass_feature_extractor
+  from object_detection.models import center_net_mobilenet_v2_feature_extractor
  from object_detection.models import center_net_resnet_feature_extractor
  from object_detection.models import center_net_resnet_v1_fpn_feature_extractor
  from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res_keras
@@ -140,11 +142,18 @@ if tf_version.is_tf2():
  CENTER_NET_EXTRACTOR_FUNCTION_MAP = {
      'resnet_v2_50': center_net_resnet_feature_extractor.resnet_v2_50,
      'resnet_v2_101': center_net_resnet_feature_extractor.resnet_v2_101,
+      'resnet_v1_18_fpn':
+          center_net_resnet_v1_fpn_feature_extractor.resnet_v1_18_fpn,
+      'resnet_v1_34_fpn':
+          center_net_resnet_v1_fpn_feature_extractor.resnet_v1_34_fpn,
      'resnet_v1_50_fpn':
          center_net_resnet_v1_fpn_feature_extractor.resnet_v1_50_fpn,
      'resnet_v1_101_fpn':
          center_net_resnet_v1_fpn_feature_extractor.resnet_v1_101_fpn,
-      'hourglass_104': center_net_hourglass_feature_extractor.hourglass_104,
+      'hourglass_104':
+          center_net_hourglass_feature_extractor.hourglass_104,
+      'mobilenet_v2':
+          center_net_mobilenet_v2_feature_extractor.mobilenet_v2,
  }
  FEATURE_EXTRACTOR_MAPS = [
@@ -648,8 +657,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
        second_stage_localization_loss_weight)
  crop_and_resize_fn = (
-      ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize
+      spatial_ops.multilevel_matmul_crop_and_resize
-      else ops.native_crop_and_resize)
+      if frcnn_config.use_matmul_crop_and_resize
+      else spatial_ops.multilevel_native_crop_and_resize)
  clip_anchors_to_image = (
      frcnn_config.clip_anchors_to_image)

--- a/research/object_detection/colab_tutorials/inference_from_saved_model_tf2_colab.ipynb
+++ b/research/object_detection/colab_tutorials/inference_from_saved_model_tf2_colab.ipynb
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "inference_from_saved_model_tf2_colab.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "cT5cdSLPX0ui"
+      },
+      "source": [
+        "# Intro to Object Detection Colab\n",
+        "\n",
+        "Welcome to the object detection colab! This demo will take you through the steps of running an \"out-of-the-box\" detection model in SavedModel format on a collection of images.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "vPs64QA1Zdov"
+      },
+      "source": [
+        "Imports"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OBzb04bdNGM8",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "!pip install -U --pre tensorflow==\"2.2.0\""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NgSXyvKSNHIl",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import os\n",
+        "import pathlib\n",
+        "\n",
+        "# Clone the tensorflow models repository if it doesn't already exist\n",
+        "if \"models\" in pathlib.Path.cwd().parts:\n",
+        "  while \"models\" in pathlib.Path.cwd().parts:\n",
+        "    os.chdir('..')\n",
+        "elif not pathlib.Path('models').exists():\n",
+        "  !git clone --depth 1 https://github.com/tensorflow/models"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "rhpPgW7TNLs6",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Install the Object Detection API\n",
+        "%%bash\n",
+        "cd models/research/\n",
+        "protoc object_detection/protos/*.proto --python_out=.\n",
+        "cp object_detection/packages/tf2/setup.py .\n",
+        "python -m pip install ."
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "yn5_uV1HLvaz",
+        "colab": {}
+      },
+      "source": [
+        "import io\n",
+        "import os\n",
+        "import scipy.misc\n",
+        "import numpy as np\n",
+        "import six\n",
+        "import time\n",
+        "\n",
+        "from six import BytesIO\n",
+        "\n",
+        "import matplotlib\n",
+        "import matplotlib.pyplot as plt\n",
+        "from PIL import Image, ImageDraw, ImageFont\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "from object_detection.utils import visualization_utils as viz_utils\n",
+        "\n",
+        "%matplotlib inline"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "-y9R0Xllefec",
+        "colab": {}
+      },
+      "source": [
+        "def load_image_into_numpy_array(path):\n",
+        "  \"\"\"Load an image from file into a numpy array.\n",
+        "\n",
+        "  Puts image into numpy array to feed into tensorflow graph.\n",
+        "  Note that by convention we put it into a numpy array with shape\n",
+        "  (height, width, channels), where channels=3 for RGB.\n",
+        "\n",
+        "  Args:\n",
+        "    path: a file path (this can be local or on colossus)\n",
+        "\n",
+        "  Returns:\n",
+        "    uint8 numpy array with shape (img_height, img_width, 3)\n",
+        "  \"\"\"\n",
+        "  img_data = tf.io.gfile.GFile(path, 'rb').read()\n",
+        "  image = Image.open(BytesIO(img_data))\n",
+        "  (im_width, im_height) = image.size\n",
+        "  return np.array(image.getdata()).reshape(\n",
+        "      (im_height, im_width, 3)).astype(np.uint8)\n",
+        "\n",
+        "# Load the COCO Label Map\n",
+        "category_index = {\n",
+        "    1: {'id': 1, 'name': 'person'},\n",
+        "    2: {'id': 2, 'name': 'bicycle'},\n",
+        "    3: {'id': 3, 'name': 'car'},\n",
+        "    4: {'id': 4, 'name': 'motorcycle'},\n",
+        "    5: {'id': 5, 'name': 'airplane'},\n",
+        "    6: {'id': 6, 'name': 'bus'},\n",
+        "    7: {'id': 7, 'name': 'train'},\n",
+        "    8: {'id': 8, 'name': 'truck'},\n",
+        "    9: {'id': 9, 'name': 'boat'},\n",
+        "    10: {'id': 10, 'name': 'traffic light'},\n",
+        "    11: {'id': 11, 'name': 'fire hydrant'},\n",
+        "    13: {'id': 13, 'name': 'stop sign'},\n",
+        "    14: {'id': 14, 'name': 'parking meter'},\n",
+        "    15: {'id': 15, 'name': 'bench'},\n",
+        "    16: {'id': 16, 'name': 'bird'},\n",
+        "    17: {'id': 17, 'name': 'cat'},\n",
+        "    18: {'id': 18, 'name': 'dog'},\n",
+        "    19: {'id': 19, 'name': 'horse'},\n",
+        "    20: {'id': 20, 'name': 'sheep'},\n",
+        "    21: {'id': 21, 'name': 'cow'},\n",
+        "    22: {'id': 22, 'name': 'elephant'},\n",
+        "    23: {'id': 23, 'name': 'bear'},\n",
+        "    24: {'id': 24, 'name': 'zebra'},\n",
+        "    25: {'id': 25, 'name': 'giraffe'},\n",
+        "    27: {'id': 27, 'name': 'backpack'},\n",
+        "    28: {'id': 28, 'name': 'umbrella'},\n",
+        "    31: {'id': 31, 'name': 'handbag'},\n",
+        "    32: {'id': 32, 'name': 'tie'},\n",
+        "    33: {'id': 33, 'name': 'suitcase'},\n",
+        "    34: {'id': 34, 'name': 'frisbee'},\n",
+        "    35: {'id': 35, 'name': 'skis'},\n",
+        "    36: {'id': 36, 'name': 'snowboard'},\n",
+        "    37: {'id': 37, 'name': 'sports ball'},\n",
+        "    38: {'id': 38, 'name': 'kite'},\n",
+        "    39: {'id': 39, 'name': 'baseball bat'},\n",
+        "    40: {'id': 40, 'name': 'baseball glove'},\n",
+        "    41: {'id': 41, 'name': 'skateboard'},\n",
+        "    42: {'id': 42, 'name': 'surfboard'},\n",
+        "    43: {'id': 43, 'name': 'tennis racket'},\n",
+        "    44: {'id': 44, 'name': 'bottle'},\n",
+        "    46: {'id': 46, 'name': 'wine glass'},\n",
+        "    47: {'id': 47, 'name': 'cup'},\n",
+        "    48: {'id': 48, 'name': 'fork'},\n",
+        "    49: {'id': 49, 'name': 'knife'},\n",
+        "    50: {'id': 50, 'name': 'spoon'},\n",
+        "    51: {'id': 51, 'name': 'bowl'},\n",
+        "    52: {'id': 52, 'name': 'banana'},\n",
+        "    53: {'id': 53, 'name': 'apple'},\n",
+        "    54: {'id': 54, 'name': 'sandwich'},\n",
+        "    55: {'id': 55, 'name': 'orange'},\n",
+        "    56: {'id': 56, 'name': 'broccoli'},\n",
+        "    57: {'id': 57, 'name': 'carrot'},\n",
+        "    58: {'id': 58, 'name': 'hot dog'},\n",
+        "    59: {'id': 59, 'name': 'pizza'},\n",
+        "    60: {'id': 60, 'name': 'donut'},\n",
+        "    61: {'id': 61, 'name': 'cake'},\n",
+        "    62: {'id': 62, 'name': 'chair'},\n",
+        "    63: {'id': 63, 'name': 'couch'},\n",
+        "    64: {'id': 64, 'name': 'potted plant'},\n",
+        "    65: {'id': 65, 'name': 'bed'},\n",
+        "    67: {'id': 67, 'name': 'dining table'},\n",
+        "    70: {'id': 70, 'name': 'toilet'},\n",
+        "    72: {'id': 72, 'name': 'tv'},\n",
+        "    73: {'id': 73, 'name': 'laptop'},\n",
+        "    74: {'id': 74, 'name': 'mouse'},\n",
+        "    75: {'id': 75, 'name': 'remote'},\n",
+        "    76: {'id': 76, 'name': 'keyboard'},\n",
+        "    77: {'id': 77, 'name': 'cell phone'},\n",
+        "    78: {'id': 78, 'name': 'microwave'},\n",
+        "    79: {'id': 79, 'name': 'oven'},\n",
+        "    80: {'id': 80, 'name': 'toaster'},\n",
+        "    81: {'id': 81, 'name': 'sink'},\n",
+        "    82: {'id': 82, 'name': 'refrigerator'},\n",
+        "    84: {'id': 84, 'name': 'book'},\n",
+        "    85: {'id': 85, 'name': 'clock'},\n",
+        "    86: {'id': 86, 'name': 'vase'},\n",
+        "    87: {'id': 87, 'name': 'scissors'},\n",
+        "    88: {'id': 88, 'name': 'teddy bear'},\n",
+        "    89: {'id': 89, 'name': 'hair drier'},\n",
+        "    90: {'id': 90, 'name': 'toothbrush'},\n",
+        "}"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "QwcBC2TlPSwg",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Download the saved model and put it into models/research/object_detection/test_data/\n",
+        "!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d5_coco17_tpu-32.tar.gz\n",
+        "!tar -xf efficientdet_d5_coco17_tpu-32.tar.gz\n",
+        "!mv efficientdet_d5_coco17_tpu-32/ models/research/object_detection/test_data/"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "Z2p-PmKLYCVU",
+        "colab": {}
+      },
+      "source": [
+        "start_time = time.time()\n",
+        "tf.keras.backend.clear_session()\n",
+        "detect_fn = tf.saved_model.load('models/research/object_detection/test_data/efficientdet_d5_coco17_tpu-32/saved_model/')\n",
+        "end_time = time.time()\n",
+        "elapsed_time = end_time - start_time\n",
+        "print('Elapsed time: ' + str(elapsed_time) + 's')"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "vukkhd5-9NSL",
+        "colab": {}
+      },
+      "source": [
+        "import time\n",
+        "\n",
+        "image_dir = 'models/research/object_detection/test_images'\n",
+        "\n",
+        "elapsed = []\n",
+        "for i in range(2):\n",
+        "  image_path = os.path.join(image_dir, 'image' + str(i + 1) + '.jpg')\n",
+        "  image_np = load_image_into_numpy_array(image_path)\n",
+        "  input_tensor = np.expand_dims(image_np, 0)\n",
+        "  start_time = time.time()\n",
+        "  detections = detect_fn(input_tensor)\n",
+        "  end_time = time.time()\n",
+        "  elapsed.append(end_time - start_time)\n",
+        "\n",
+        "  plt.rcParams['figure.figsize'] = [42, 21]\n",
+        "  label_id_offset = 1\n",
+        "  image_np_with_detections = image_np.copy()\n",
+        "  viz_utils.visualize_boxes_and_labels_on_image_array(\n",
+        "        image_np_with_detections,\n",
+        "        detections['detection_boxes'][0].numpy(),\n",
+        "        detections['detection_classes'][0].numpy().astype(np.int32),\n",
+        "        detections['detection_scores'][0].numpy(),\n",
+        "        category_index,\n",
+        "        use_normalized_coordinates=True,\n",
+        "        max_boxes_to_draw=200,\n",
+        "        min_score_thresh=.40,\n",
+        "        agnostic_mode=False)\n",
+        "  plt.subplot(2, 1, i+1)\n",
+        "  plt.imshow(image_np_with_detections)\n",
+        "\n",
+        "mean_elapsed = sum(elapsed) / float(len(elapsed))\n",
+        "print('Elapsed time: ' + str(mean_elapsed) + ' second per image')"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -1600,6 +1600,17 @@ class CenterNetKeypointTargetAssigner(object):
    return (batch_indices, batch_offsets, batch_weights)
+def _resize_masks(masks, height, width, method):
+  # Resize segmentation masks to conform to output dimensions. Use TF2
+  # image resize because TF1's version is buggy:
+  # https://yaqs.corp.google.com/eng/q/4970450458378240
+  masks = tf2.image.resize(
+      masks[:, :, :, tf.newaxis],
+      size=(height, width),
+      method=method)
+  return masks[:, :, :, 0]
 class CenterNetMaskTargetAssigner(object):
  """Wrapper to compute targets for segmentation masks."""
@@ -1641,13 +1652,9 @@ class CenterNetMaskTargetAssigner(object):
    segmentation_targets_list = []
    for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list):
-      # Resize segmentation masks to conform to output dimensions. Use TF2
+      gt_masks = _resize_masks(gt_masks, output_height, output_width,
-      # image resize because TF1's version is buggy:
+                               mask_resize_method)
-      # https://yaqs.corp.google.com/eng/q/4970450458378240
+      gt_masks = gt_masks[:, :, :, tf.newaxis]
-      gt_masks = tf2.image.resize(
-          gt_masks[:, :, :, tf.newaxis],
-          size=(output_height, output_width),
-          method=mask_resize_method)
      gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes])
      # Shape: [h, w, num_classes].
      segmentations_for_image = tf.reduce_max(
@@ -1771,3 +1778,120 @@ class CenterNetDensePoseTargetAssigner(object):
    batch_surface_coords = tf.concat(batch_surface_coords, axis=0)
    batch_weights = tf.concat(batch_weights, axis=0)
    return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
+def filter_mask_overlap_min_area(masks):
+  """If a pixel belongs to 2 instances, remove it from the larger instance."""
+  num_instances = tf.shape(masks)[0]
+  def _filter_min_area():
+    """Helper function to filter non empty masks."""
+    areas = tf.reduce_sum(masks, axis=[1, 2], keepdims=True)
+    per_pixel_area = masks * areas
+    # Make sure background is ignored in argmin.
+    per_pixel_area = (masks * per_pixel_area +
+                      (1 - masks) * per_pixel_area.dtype.max)
+    min_index = tf.cast(tf.argmin(per_pixel_area, axis=0), tf.int32)
+    filtered_masks = (
+        tf.range(num_instances)[:, tf.newaxis, tf.newaxis]
+        ==
+        min_index[tf.newaxis, :, :]
+    )
+    return tf.cast(filtered_masks, tf.float32) * masks
+  return tf.cond(num_instances > 0, _filter_min_area,
+                 lambda: masks)
+def filter_mask_overlap(masks, method='min_area'):
+  if method == 'min_area':
+    return filter_mask_overlap_min_area(masks)
+  else:
+    raise ValueError('Unknown mask overlap filter type - {}'.format(method))
+class CenterNetCornerOffsetTargetAssigner(object):
+  """Wrapper to compute corner offsets for boxes using masks."""
+  def __init__(self, stride, overlap_resolution='min_area'):
+    """Initializes the corner offset target assigner.
+    Args:
+      stride: int, the stride of the network in output pixels.
+      overlap_resolution: string, specifies how we handle overlapping
+        instance masks. Currently only 'min_area' is supported which assigns
+        overlapping pixels to the instance with the minimum area.
+    """
+    self._stride = stride
+    self._overlap_resolution = overlap_resolution
+  def assign_corner_offset_targets(
+      self, gt_boxes_list, gt_masks_list):
+    """Computes the corner offset targets and foreground map.
+    For each pixel that is part of any object's foreground, this function
+    computes the relative offsets to the top-left and bottom-right corners of
+    that instance's bounding box. It also returns a foreground map to indicate
+    which pixels contain valid corner offsets.
+    Args:
+      gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
+        representing the groundtruth detection bounding boxes for each sample in
+        the batch. The coordinates are expected in normalized coordinates.
+      gt_masks_list: A list of float tensors with shape [num_boxes,
+        input_height, input_width] with values in {0, 1} representing instance
+        masks for each object.
+    Returns:
+      corner_offsets: A float tensor of shape [batch_size, height, width, 4]
+        containing, in order, the (y, x) offsets to the top left corner and
+        the (y, x) offsets to the bottom right corner for each foregroung pixel
+      foreground: A float tensor of shape [batch_size, height, width] in which
+        each pixel is set to 1 if it is a part of any instance's foreground
+        (and thus contains valid corner offsets) and 0 otherwise.
+    """
+    _, input_height, input_width = (
+        shape_utils.combined_static_and_dynamic_shape(gt_masks_list[0]))
+    output_height = input_height // self._stride
+    output_width = input_width // self._stride
+    y_grid, x_grid = tf.meshgrid(
+        tf.range(output_height), tf.range(output_width),
+        indexing='ij')
+    y_grid, x_grid = tf.cast(y_grid, tf.float32), tf.cast(x_grid, tf.float32)
+    corner_targets = []
+    foreground_targets = []
+    for gt_masks, gt_boxes in zip(gt_masks_list, gt_boxes_list):
+      gt_masks = _resize_masks(gt_masks, output_height, output_width,
+                               method=ResizeMethod.NEAREST_NEIGHBOR)
+      gt_masks = filter_mask_overlap(gt_masks, self._overlap_resolution)
+      ymin, xmin, ymax, xmax = tf.unstack(gt_boxes, axis=1)
+      ymin, ymax = ymin * output_height, ymax * output_height
+      xmin, xmax = xmin * output_width, xmax * output_width
+      top_y = ymin[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
+      left_x = xmin[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
+      bottom_y = ymax[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
+      right_x = xmax[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
+      foreground_target = tf.cast(tf.reduce_sum(gt_masks, axis=0) > 0.5,
+                                  tf.float32)
+      foreground_targets.append(foreground_target)
+      corner_target = tf.stack([
+          tf.reduce_sum(top_y * gt_masks, axis=0),
+          tf.reduce_sum(left_x * gt_masks, axis=0),
+          tf.reduce_sum(bottom_y * gt_masks, axis=0),
+          tf.reduce_sum(right_x * gt_masks, axis=0),
+      ], axis=2)
+      corner_targets.append(corner_target)
+    return (tf.stack(corner_targets, axis=0),
+            tf.stack(foreground_targets, axis=0))
--- a/research/object_detection/core/target_assigner_test.py
+++ b/research/object_detection/core/target_assigner_test.py
@@ -1999,6 +1999,181 @@ class CenterNetDensePoseTargetAssignerTest(test_case.TestCase):
    self.assertAllClose(expected_batch_weights, batch_weights)
+class CornerOffsetTargetAssignerTest(test_case.TestCase):
+  def test_filter_overlap_min_area_empty(self):
+    """Test that empty masks work on CPU."""
+    def graph_fn(masks):
+      return targetassigner.filter_mask_overlap_min_area(masks)
+    masks = self.execute_cpu(graph_fn, [np.zeros((0, 5, 5), dtype=np.float32)])
+    self.assertEqual(masks.shape, (0, 5, 5))
+  def test_filter_overlap_min_area(self):
+    """Test the object with min. area is selected instead of overlap."""
+    def graph_fn(masks):
+      return targetassigner.filter_mask_overlap_min_area(masks)
+    masks = np.zeros((3, 4, 4), dtype=np.float32)
+    masks[0, :2, :2] = 1.0
+    masks[1, :3, :3] = 1.0
+    masks[2, 3, 3] = 1.0
+    masks = self.execute(graph_fn, [masks])
+    self.assertAllClose(masks[0],
+                        [[1, 1, 0, 0],
+                         [1, 1, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0]])
+    self.assertAllClose(masks[1],
+                        [[0, 0, 1, 0],
+                         [0, 0, 1, 0],
+                         [1, 1, 1, 0],
+                         [0, 0, 0, 0]])
+    self.assertAllClose(masks[2],
+                        [[0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 1]])
+  def test_assign_corner_offset_single_object(self):
+    """Test that corner offsets are correct with a single object."""
+    assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
+    def graph_fn():
+      boxes = [
+          tf.constant([[0., 0., 1., 1.]])
+      ]
+      mask = np.zeros((1, 4, 4), dtype=np.float32)
+      mask[0, 1:3, 1:3] = 1.0
+      masks = [tf.constant(mask)]
+      return assigner.assign_corner_offset_targets(boxes, masks)
+    corner_offsets, foreground = self.execute(graph_fn, [])
+    self.assertAllClose(foreground[0],
+                        [[0, 0, 0, 0],
+                         [0, 1, 1, 0],
+                         [0, 1, 1, 0],
+                         [0, 0, 0, 0]])
+    self.assertAllClose(corner_offsets[0, :, :, 0],
+                        [[0, 0, 0, 0],
+                         [0, -1, -1, 0],
+                         [0, -2, -2, 0],
+                         [0, 0, 0, 0]])
+    self.assertAllClose(corner_offsets[0, :, :, 1],
+                        [[0, 0, 0, 0],
+                         [0, -1, -2, 0],
+                         [0, -1, -2, 0],
+                         [0, 0, 0, 0]])
+    self.assertAllClose(corner_offsets[0, :, :, 2],
+                        [[0, 0, 0, 0],
+                         [0, 3, 3, 0],
+                         [0, 2, 2, 0],
+                         [0, 0, 0, 0]])
+    self.assertAllClose(corner_offsets[0, :, :, 3],
+                        [[0, 0, 0, 0],
+                         [0, 3, 2, 0],
+                         [0, 3, 2, 0],
+                         [0, 0, 0, 0]])
+  def test_assign_corner_offset_multiple_objects(self):
+    """Test corner offsets are correct with multiple objects."""
+    assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
+    def graph_fn():
+      boxes = [
+          tf.constant([[0., 0., 1., 1.], [0., 0., 0., 0.]]),
+          tf.constant([[0., 0., .25, .25], [.25, .25, 1., 1.]])
+      ]
+      mask1 = np.zeros((2, 4, 4), dtype=np.float32)
+      mask1[0, 0, 0] = 1.0
+      mask1[0, 3, 3] = 1.0
+      mask2 = np.zeros((2, 4, 4), dtype=np.float32)
+      mask2[0, :2, :2] = 1.0
+      mask2[1, 1:, 1:] = 1.0
+      masks = [tf.constant(mask1), tf.constant(mask2)]
+      return assigner.assign_corner_offset_targets(boxes, masks)
+    corner_offsets, foreground = self.execute(graph_fn, [])
+    self.assertEqual(corner_offsets.shape, (2, 4, 4, 4))
+    self.assertEqual(foreground.shape, (2, 4, 4))
+    self.assertAllClose(foreground[0],
+                        [[1, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 1]])
+    self.assertAllClose(corner_offsets[0, :, :, 0],
+                        [[0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, -3]])
+    self.assertAllClose(corner_offsets[0, :, :, 1],
+                        [[0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, -3]])
+    self.assertAllClose(corner_offsets[0, :, :, 2],
+                        [[4, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 1]])
+    self.assertAllClose(corner_offsets[0, :, :, 3],
+                        [[4, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 1]])
+    self.assertAllClose(foreground[1],
+                        [[1, 1, 0, 0],
+                         [1, 1, 1, 1],
+                         [0, 1, 1, 1],
+                         [0, 1, 1, 1]])
+    self.assertAllClose(corner_offsets[1, :, :, 0],
+                        [[0, 0, 0, 0],
+                         [-1, -1, 0, 0],
+                         [0, -1, -1, -1],
+                         [0, -2, -2, -2]])
+    self.assertAllClose(corner_offsets[1, :, :, 1],
+                        [[0, -1, 0, 0],
+                         [0, -1, -1, -2],
+                         [0, 0, -1, -2],
+                         [0, 0, -1, -2]])
+    self.assertAllClose(corner_offsets[1, :, :, 2],
+                        [[1, 1, 0, 0],
+                         [0, 0, 3, 3],
+                         [0, 2, 2, 2],
+                         [0, 1, 1, 1]])
+    self.assertAllClose(corner_offsets[1, :, :, 3],
+                        [[1, 0, 0, 0],
+                         [1, 0, 2, 1],
+                         [0, 3, 2, 1],
+                         [0, 3, 2, 1]])
+  def test_assign_corner_offsets_no_objects(self):
+    """Test assignment works with empty input on cpu."""
+    assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
+    def graph_fn():
+      boxes = [
+          tf.zeros((0, 4), dtype=tf.float32)
+      ]
+      masks = [tf.zeros((0, 5, 5), dtype=tf.float32)]
+      return assigner.assign_corner_offset_targets(boxes, masks)
+    corner_offsets, foreground = self.execute_cpu(graph_fn, [])
+    self.assertAllClose(corner_offsets, np.zeros((1, 5, 5, 4)))
+    self.assertAllClose(foreground, np.zeros((1, 5, 5)))
 if __name__ == '__main__':
  tf.enable_v2_behavior()
  tf.test.main()
--- a/research/object_detection/g3doc/release_notes.md
+++ b/research/object_detection/g3doc/release_notes.md
@@ -40,6 +40,18 @@ Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer
 Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi,
 Yixin Shi, Yu-hui Chen, Zhichao Lu.
+### June 26th, 2020
+We have released SSDLite with MobileDet GPU backbone, which achieves 17% mAP
+higher than the MobileNetV2 SSDLite (27.5 mAP vs 23.5 mAP) on a NVIDIA Jetson
+Xavier at comparable latency (3.2ms vs 3.3ms).
+Along with the model definition, we are also releasing model checkpoints trained
+on the COCO dataset.
+<b>Thanks to contributors</b>: Yongzhe Wang, Bo Chen, Hanxiao Liu, Le An
+(NVIDIA), Yu-Te Cheng (NVIDIA), Oliver Knieps (NVIDIA), and Josh Park (NVIDIA).
 ### June 17th, 2020
 We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that

--- a/research/object_detection/inputs.py
+++ b/research/object_detection/inputs.py
@@ -1094,8 +1094,12 @@ def get_reduce_to_frame_fn(input_reader_config, is_training):
          num_frames = tf.cast(
              tf.shape(tensor_dict[fields.InputDataFields.source_id])[0],
              dtype=tf.int32)
-          frame_index = tf.random.uniform((), minval=0, maxval=num_frames,
+          if input_reader_config.frame_index == -1:
-                                          dtype=tf.int32)
+            frame_index = tf.random.uniform((), minval=0, maxval=num_frames,
+                                            dtype=tf.int32)
+          else:
+            frame_index = tf.constant(input_reader_config.frame_index,
+                                      dtype=tf.int32)
          out_tensor_dict = {}
          for key in tensor_dict:
            if key in fields.SEQUENCE_FIELDS:

--- a/research/object_detection/inputs_test.py
+++ b/research/object_detection/inputs_test.py
@@ -61,7 +61,7 @@ def _get_configs_for_model(model_name):
      configs, kwargs_dict=override_dict)
-def _get_configs_for_model_sequence_example(model_name):
+def _get_configs_for_model_sequence_example(model_name, frame_index=-1):
  """Returns configurations for model."""
  fname = os.path.join(tf.resource_loader.get_data_files_path(),
                       'test_data/' + model_name + '.config')
@@ -74,7 +74,8 @@ def _get_configs_for_model_sequence_example(model_name):
  override_dict = {
      'train_input_path': data_path,
      'eval_input_path': data_path,
-      'label_map_path': label_map_path
+      'label_map_path': label_map_path,
+      'frame_index': frame_index
  }
  return config_util.merge_external_params_with_configs(
      configs, kwargs_dict=override_dict)
@@ -312,6 +313,46 @@ class InputFnTest(test_case.TestCase, parameterized.TestCase):
        tf.float32,
        labels[fields.InputDataFields.groundtruth_weights].dtype)
+  def test_context_rcnn_resnet50_train_input_with_sequence_example_frame_index(
+      self, train_batch_size=8):
+    """Tests the training input function for FasterRcnnResnet50."""
+    configs = _get_configs_for_model_sequence_example(
+        'context_rcnn_camera_trap', frame_index=2)
+    model_config = configs['model']
+    train_config = configs['train_config']
+    train_config.batch_size = train_batch_size
+    train_input_fn = inputs.create_train_input_fn(
+        train_config, configs['train_input_config'], model_config)
+    features, labels = _make_initializable_iterator(train_input_fn()).get_next()
+    self.assertAllEqual([train_batch_size, 640, 640, 3],
+                        features[fields.InputDataFields.image].shape.as_list())
+    self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
+    self.assertAllEqual([train_batch_size],
+                        features[inputs.HASH_KEY].shape.as_list())
+    self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
+    self.assertAllEqual(
+        [train_batch_size, 100, 4],
+        labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
+    self.assertEqual(tf.float32,
+                     labels[fields.InputDataFields.groundtruth_boxes].dtype)
+    self.assertAllEqual(
+        [train_batch_size, 100, model_config.faster_rcnn.num_classes],
+        labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
+    self.assertEqual(tf.float32,
+                     labels[fields.InputDataFields.groundtruth_classes].dtype)
+    self.assertAllEqual(
+        [train_batch_size, 100],
+        labels[fields.InputDataFields.groundtruth_weights].shape.as_list())
+    self.assertEqual(tf.float32,
+                     labels[fields.InputDataFields.groundtruth_weights].dtype)
+    self.assertAllEqual(
+        [train_batch_size, 100, model_config.faster_rcnn.num_classes],
+        labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
+    self.assertEqual(
+        tf.float32,
+        labels[fields.InputDataFields.groundtruth_confidences].dtype)
  def test_ssd_inceptionV2_train_input(self):
    """Tests the training input function for SSDInceptionV2."""
    configs = _get_configs_for_model('ssd_inception_v2_pets')

--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
@@ -332,7 +332,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
    """
    box_features = self._crop_and_resize_fn(
-        features_to_crop, proposal_boxes_normalized,
+        [features_to_crop], proposal_boxes_normalized, None,
        [self._initial_crop_size, self._initial_crop_size])
    attention_features = self._context_feature_extract_fn(

--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py
@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
 from object_detection.protos import box_predictor_pb2
 from object_detection.protos import hyperparams_pb2
 from object_detection.protos import post_processing_pb2
-from object_detection.utils import ops
+from object_detection.utils import spatial_transform_ops as spatial_ops
 from object_detection.utils import test_case
 from object_detection.utils import test_utils
 from object_detection.utils import tf_version
@@ -362,8 +362,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
          max_negatives_per_positive=None)
    crop_and_resize_fn = (
-        ops.matmul_crop_and_resize
+        spatial_ops.multilevel_matmul_crop_and_resize
-        if use_matmul_crop_and_resize else ops.native_crop_and_resize)
+        if use_matmul_crop_and_resize
+        else spatial_ops.multilevel_native_crop_and_resize)
    common_kwargs = {
        'is_training':
            is_training,

--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
@@ -1948,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
    Returns:
      A float32 tensor with shape [K, new_height, new_width, depth].
    """
+    features_to_crop = [features_to_crop]
+    num_levels = len(features_to_crop)
+    box_levels = None
+    if num_levels != 1:
+      # If there are multiple levels to select, get the box levels
+      box_levels = ops.fpn_feature_levels(num_levels, num_levels - 1,
+                                          1.0/224, proposal_boxes_normalized)
    cropped_regions = self._flatten_first_two_dimensions(
        self._crop_and_resize_fn(
-            features_to_crop, proposal_boxes_normalized,
+            features_to_crop, proposal_boxes_normalized, box_levels,
            [self._initial_crop_size, self._initial_crop_size]))
    return self._maxpool_layer(cropped_regions)
@@ -2517,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
            image_shape[1], image_shape[2], check_range=False).get()
        flat_cropped_gt_mask = self._crop_and_resize_fn(
-            tf.expand_dims(flat_gt_masks, -1),
+            [tf.expand_dims(flat_gt_masks, -1)],
-            tf.expand_dims(flat_normalized_proposals, axis=1),
+            tf.expand_dims(flat_normalized_proposals, axis=1), None,
            [mask_height, mask_width])
        # Without stopping gradients into cropped groundtruth masks the
        # performance with 100-padded groundtruth masks when batch size > 1 is

--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
 from object_detection.protos import box_predictor_pb2
 from object_detection.protos import hyperparams_pb2
 from object_detection.protos import post_processing_pb2
-from object_detection.utils import ops
+from object_detection.utils import spatial_transform_ops as spatial_ops
 from object_detection.utils import test_case
 from object_detection.utils import test_utils
 from object_detection.utils import tf_version
@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
          max_negatives_per_positive=None)
    crop_and_resize_fn = (
-        ops.matmul_crop_and_resize
+        spatial_ops.multilevel_matmul_crop_and_resize
-        if use_matmul_crop_and_resize else ops.native_crop_and_resize)
+        if use_matmul_crop_and_resize
+        else spatial_ops.multilevel_native_crop_and_resize)
    common_kwargs = {
        'is_training':
            is_training,

--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -414,7 +414,7 @@ def train_loop(
    train_steps=None,
    use_tpu=False,
    save_final_config=False,
-    checkpoint_every_n=1000,
+    checkpoint_every_n=5000,
    checkpoint_max_to_keep=7,
    **kwargs):
  """Trains a model using eager + functions.
@@ -855,6 +855,7 @@ def eval_continuously(
    checkpoint_dir=None,
    wait_interval=180,
    timeout=3600,
+    eval_index=None,
    **kwargs):
  """Run continuous evaluation of a detection model eagerly.
@@ -884,6 +885,8 @@ def eval_continuously(
      new checkpoint.
    timeout: The maximum number of seconds to wait for a checkpoint. Execution
      will terminate if no new checkpoints are found after these many seconds.
+    eval_index: int, optional If give, only evaluate the dataset at the given
+      index.
    **kwargs: Additional keyword arguments for configuration override.
  """
@@ -937,6 +940,11 @@ def eval_continuously(
        model=detection_model)
    eval_inputs.append((eval_input_config.name, next_eval_input))
+  if eval_index is not None:
+    eval_inputs = [eval_inputs[eval_index]]
+    tf.logging.info('eval_index selected - {}'.format(
+        eval_inputs))
  global_step = tf.compat.v2.Variable(
      0, trainable=False, dtype=tf.compat.v2.dtypes.int64)