Move retinanet keras model to tensorflow_models/official

PiperOrigin-RevId: 274010788

Move retinanet keras model to tensorflow_models/official
PiperOrigin-RevId: 274010788
bcb231f0 · Yeqing Li · A. Unique TensorFlower · 04ce9636 · bcb231f0 · bcb231f0
Commit bcb231f0 authored Oct 10, 2019 by Yeqing Li Committed by A. Unique TensorFlower Oct 10, 2019
15 changed files
--- a/official/vision/detection/utils/object_detection/balanced_positive_negative_sampler.py
+++ b/official/vision/detection/utils/object_detection/balanced_positive_negative_sampler.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Class to subsample minibatches by balancing positives and negatives.
+
+Subsamples minibatches based on a pre-specified positive fraction in range
+[0,1]. The class presumes there are many more negatives than positive examples:
+if the desired batch_size cannot be achieved with the pre-specified positive
+fraction, it fills the rest with negative examples. If this is not sufficient
+for obtaining the desired batch_size, it returns fewer examples.
+
+The main function to call is Subsample(self, indicator, labels). For convenience
+one can also call SubsampleWeights(self, weights, labels) which is defined in
+the minibatch_sampler base class.
+
+When is_static is True, it implements a method that guarantees static shapes.
+It also ensures the length of output of the subsample is always batch_size, even
+when number of examples set to True in indicator is less than batch_size.
+
+This is originally implemented in TensorFlow Object Detection API.
+"""
+
+import tensorflow.compat.v2 as tf
+
+from official.vision.detection.utils.object_detection import minibatch_sampler
+from official.vision.detection.utils.object_detection import ops
+
+
+class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
+  """Subsamples minibatches to a desired balance of positives and negatives."""
+
+  def __init__(self, positive_fraction=0.5, is_static=False):
+    """Constructs a minibatch sampler.
+
+    Args:
+      positive_fraction: desired fraction of positive examples (scalar in [0,1])
+        in the batch.
+      is_static: If True, uses an implementation with static shape guarantees.
+
+    Raises:
+      ValueError: if positive_fraction < 0, or positive_fraction > 1
+    """
+    if positive_fraction < 0 or positive_fraction > 1:
+      raise ValueError('positive_fraction should be in range [0,1]. '
+                       'Received: %s.' % positive_fraction)
+    self._positive_fraction = positive_fraction
+    self._is_static = is_static
+
+  def _get_num_pos_neg_samples(self, sorted_indices_tensor, sample_size):
+    """Counts the number of positives and negatives numbers to be sampled.
+
+    Args:
+      sorted_indices_tensor: A sorted int32 tensor of shape [N] which contains
+        the signed indices of the examples where the sign is based on the label
+        value. The examples that cannot be sampled are set to 0. It samples
+        atmost sample_size*positive_fraction positive examples and remaining
+        from negative examples.
+      sample_size: Size of subsamples.
+
+    Returns:
+      A tuple containing the number of positive and negative labels in the
+      subsample.
+    """
+    input_length = tf.shape(input=sorted_indices_tensor)[0]
+    valid_positive_index = tf.greater(sorted_indices_tensor,
+                                      tf.zeros(input_length, tf.int32))
+    num_sampled_pos = tf.reduce_sum(
+        input_tensor=tf.cast(valid_positive_index, tf.int32))
+    max_num_positive_samples = tf.constant(
+        int(sample_size * self._positive_fraction), tf.int32)
+    num_positive_samples = tf.minimum(max_num_positive_samples, num_sampled_pos)
+    num_negative_samples = tf.constant(sample_size,
+                                       tf.int32) - num_positive_samples
+
+    return num_positive_samples, num_negative_samples
+
+  def _get_values_from_start_and_end(self, input_tensor, num_start_samples,
+                                     num_end_samples, total_num_samples):
+    """slices num_start_samples and last num_end_samples from input_tensor.
+
+    Args:
+      input_tensor: An int32 tensor of shape [N] to be sliced.
+      num_start_samples: Number of examples to be sliced from the beginning
+        of the input tensor.
+      num_end_samples: Number of examples to be sliced from the end of the
+        input tensor.
+      total_num_samples: Sum of is num_start_samples and num_end_samples. This
+        should be a scalar.
+
+    Returns:
+      A tensor containing the first num_start_samples and last num_end_samples
+      from input_tensor.
+
+    """
+    input_length = tf.shape(input=input_tensor)[0]
+    start_positions = tf.less(tf.range(input_length), num_start_samples)
+    end_positions = tf.greater_equal(
+        tf.range(input_length), input_length - num_end_samples)
+    selected_positions = tf.logical_or(start_positions, end_positions)
+    selected_positions = tf.cast(selected_positions, tf.float32)
+    indexed_positions = tf.multiply(tf.cumsum(selected_positions),
+                                    selected_positions)
+    one_hot_selector = tf.one_hot(tf.cast(indexed_positions, tf.int32) - 1,
+                                  total_num_samples,
+                                  dtype=tf.float32)
+    return tf.cast(tf.tensordot(tf.cast(input_tensor, tf.float32),
+                                one_hot_selector, axes=[0, 0]), tf.int32)
+
+  def _static_subsample(self, indicator, batch_size, labels):
+    """Returns subsampled minibatch.
+
+    Args:
+      indicator: boolean tensor of shape [N] whose True entries can be sampled.
+        N should be a complie time constant.
+      batch_size: desired batch size. This scalar cannot be None.
+      labels: boolean tensor of shape [N] denoting positive(=True) and negative
+        (=False) examples. N should be a complie time constant.
+
+    Returns:
+      sampled_idx_indicator: boolean tensor of shape [N], True for entries which
+        are sampled. It ensures the length of output of the subsample is always
+        batch_size, even when number of examples set to True in indicator is
+        less than batch_size.
+
+    Raises:
+      ValueError: if labels and indicator are not 1D boolean tensors.
+    """
+    # Check if indicator and labels have a static size.
+    if not indicator.shape.is_fully_defined():
+      raise ValueError('indicator must be static in shape when is_static is'
+                       'True')
+    if not labels.shape.is_fully_defined():
+      raise ValueError('labels must be static in shape when is_static is'
+                       'True')
+    if not isinstance(batch_size, int):
+      raise ValueError('batch_size has to be an integer when is_static is'
+                       'True.')
+
+    input_length = tf.shape(input=indicator)[0]
+
+    # Set the number of examples set True in indicator to be at least
+    # batch_size.
+    num_true_sampled = tf.reduce_sum(
+        input_tensor=tf.cast(indicator, tf.float32))
+    additional_false_sample = tf.less_equal(
+        tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)),
+        batch_size - num_true_sampled)
+    indicator = tf.logical_or(indicator, additional_false_sample)
+
+    # Shuffle indicator and label. Need to store the permutation to restore the
+    # order post sampling.
+    permutation = tf.random.shuffle(tf.range(input_length))
+    indicator = ops.matmul_gather_on_zeroth_axis(
+        tf.cast(indicator, tf.float32), permutation)
+    labels = ops.matmul_gather_on_zeroth_axis(
+        tf.cast(labels, tf.float32), permutation)
+
+    # index (starting from 1) when indicator is True, 0 when False
+    indicator_idx = tf.where(
+        tf.cast(indicator, tf.bool), tf.range(1, input_length + 1),
+        tf.zeros(input_length, tf.int32))
+
+    # Replace -1 for negative, +1 for positive labels
+    signed_label = tf.where(
+        tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32),
+        tf.scalar_mul(-1, tf.ones(input_length, tf.int32)))
+    # negative of index for negative label, positive index for positive label,
+    # 0 when indicator is False.
+    signed_indicator_idx = tf.multiply(indicator_idx, signed_label)
+    sorted_signed_indicator_idx = tf.nn.top_k(
+        signed_indicator_idx, input_length, sorted=True).values
+
+    [num_positive_samples,
+     num_negative_samples] = self._get_num_pos_neg_samples(
+         sorted_signed_indicator_idx, batch_size)
+
+    sampled_idx = self._get_values_from_start_and_end(
+        sorted_signed_indicator_idx, num_positive_samples,
+        num_negative_samples, batch_size)
+
+    # Shift the indices to start from 0 and remove any samples that are set as
+    # False.
+    sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32)
+    sampled_idx = tf.multiply(
+        tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32),
+        sampled_idx)
+
+    sampled_idx_indicator = tf.cast(
+        tf.reduce_sum(
+            input_tensor=tf.one_hot(sampled_idx, depth=input_length), axis=0),
+        tf.bool)
+
+    # project back the order based on stored permutations
+    reprojections = tf.one_hot(permutation, depth=input_length,
+                               dtype=tf.float32)
+    return tf.cast(tf.tensordot(
+        tf.cast(sampled_idx_indicator, tf.float32),
+        reprojections, axes=[0, 0]), tf.bool)
+
+  def subsample(self, indicator, batch_size, labels, scope=None):
+    """Returns subsampled minibatch.
+
+    Args:
+      indicator: boolean tensor of shape [N] whose True entries can be sampled.
+      batch_size: desired batch size. If None, keeps all positive samples and
+        randomly selects negative samples so that the positive sample fraction
+        matches self._positive_fraction. It cannot be None is is_static is True.
+      labels: boolean tensor of shape [N] denoting positive(=True) and negative
+          (=False) examples.
+      scope: name scope.
+
+    Returns:
+      sampled_idx_indicator: boolean tensor of shape [N], True for entries which
+        are sampled.
+
+    Raises:
+      ValueError: if labels and indicator are not 1D boolean tensors.
+    """
+    if len(indicator.get_shape().as_list()) != 1:
+      raise ValueError('indicator must be 1 dimensional, got a tensor of '
+                       'shape %s' % indicator.get_shape())
+    if len(labels.get_shape().as_list()) != 1:
+      raise ValueError('labels must be 1 dimensional, got a tensor of '
+                       'shape %s' % labels.get_shape())
+    if labels.dtype != tf.bool:
+      raise ValueError('labels should be of type bool. Received: %s' %
+                       labels.dtype)
+    if indicator.dtype != tf.bool:
+      raise ValueError('indicator should be of type bool. Received: %s' %
+                       indicator.dtype)
+    scope = scope or 'BalancedPositiveNegativeSampler'
+    with tf.name_scope(scope):
+      if self._is_static:
+        return self._static_subsample(indicator, batch_size, labels)
+
+      else:
+        # Only sample from indicated samples
+        negative_idx = tf.logical_not(labels)
+        positive_idx = tf.logical_and(labels, indicator)
+        negative_idx = tf.logical_and(negative_idx, indicator)
+
+        # Sample positive and negative samples separately
+        if batch_size is None:
+          max_num_pos = tf.reduce_sum(
+              input_tensor=tf.cast(positive_idx, dtype=tf.int32))
+        else:
+          max_num_pos = int(self._positive_fraction * batch_size)
+        sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
+        num_sampled_pos = tf.reduce_sum(
+            input_tensor=tf.cast(sampled_pos_idx, tf.int32))
+        if batch_size is None:
+          negative_positive_ratio = (
+              1 - self._positive_fraction) / self._positive_fraction
+          max_num_neg = tf.cast(
+              negative_positive_ratio *
+              tf.cast(num_sampled_pos, dtype=tf.float32),
+              dtype=tf.int32)
+        else:
+          max_num_neg = batch_size - num_sampled_pos
+        sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)
+
+        return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
--- a/official/vision/detection/utils/object_detection/box_coder.py
+++ b/official/vision/detection/utils/object_detection/box_coder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base box coder.
+
+Box coders convert between coordinate frames, namely image-centric
+(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
+defined by a specific anchor).
+
+Users of a BoxCoder can call two methods:
+ encode: which encodes a box with respect to a given anchor
+  (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
+ decode: which inverts this encoding with a decode operation.
+In both cases, the arguments are assumed to be in 1-1 correspondence already;
+it is not the job of a BoxCoder to perform matching.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+from abc import abstractproperty
+
+import tensorflow.compat.v2 as tf
+
+
+# Box coder types.
+FASTER_RCNN = 'faster_rcnn'
+KEYPOINT = 'keypoint'
+MEAN_STDDEV = 'mean_stddev'
+SQUARE = 'square'
+
+
+class BoxCoder(object):
+  """Abstract base class for box coder."""
+  __metaclass__ = ABCMeta
+
+  @abstractproperty
+  def code_size(self):
+    """Return the size of each code.
+
+    This number is a constant and should agree with the output of the `encode`
+    op (e.g. if rel_codes is the output of self.encode(...), then it should have
+    shape [N, code_size()]).  This abstractproperty should be overridden by
+    implementations.
+
+    Returns:
+      an integer constant
+    """
+    pass
+
+  def encode(self, boxes, anchors):
+    """Encode a box list relative to an anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded
+      anchors: BoxList of N anchors
+
+    Returns:
+      a tensor representing N relative-encoded boxes
+    """
+    with tf.name_scope('Encode'):
+      return self._encode(boxes, anchors)
+
+  def decode(self, rel_codes, anchors):
+    """Decode boxes that are encoded relative to an anchor collection.
+
+    Args:
+      rel_codes: a tensor representing N relative-encoded boxes
+      anchors: BoxList of anchors
+
+    Returns:
+      boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
+        with corners y_min, x_min, y_max, x_max)
+    """
+    with tf.name_scope('Decode'):
+      return self._decode(rel_codes, anchors)
+
+  @abstractmethod
+  def _encode(self, boxes, anchors):
+    """Method to be overriden by implementations.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded
+      anchors: BoxList of N anchors
+
+    Returns:
+      a tensor representing N relative-encoded boxes
+    """
+    pass
+
+  @abstractmethod
+  def _decode(self, rel_codes, anchors):
+    """Method to be overriden by implementations.
+
+    Args:
+      rel_codes: a tensor representing N relative-encoded boxes
+      anchors: BoxList of anchors
+
+    Returns:
+      boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
+        with corners y_min, x_min, y_max, x_max)
+    """
+    pass
+
+
+def batch_decode(encoded_boxes, box_coder, anchors):
+  """Decode a batch of encoded boxes.
+
+  This op takes a batch of encoded bounding boxes and transforms
+  them to a batch of bounding boxes specified by their corners in
+  the order of [y_min, x_min, y_max, x_max].
+
+  Args:
+    encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
+      code_size] representing the location of the objects.
+    box_coder: a BoxCoder object.
+    anchors: a BoxList of anchors used to encode `encoded_boxes`.
+
+  Returns:
+    decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
+      coder_size] representing the corners of the objects in the order
+      of [y_min, x_min, y_max, x_max].
+
+  Raises:
+    ValueError: if batch sizes of the inputs are inconsistent, or if
+    the number of anchors inferred from encoded_boxes and anchors are
+    inconsistent.
+  """
+  encoded_boxes.get_shape().assert_has_rank(3)
+  if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
+    raise ValueError('The number of anchors inferred from encoded_boxes'
+                     ' and anchors are inconsistent: shape[1] of encoded_boxes'
+                     ' %s should be equal to the number of anchors: %s.' %
+                     (encoded_boxes.get_shape()[1].value,
+                      anchors.num_boxes_static()))
+
+  decoded_boxes = tf.stack([
+      box_coder.decode(boxes, anchors).get()
+      for boxes in tf.unstack(encoded_boxes)
+  ])
+  return decoded_boxes
--- a/official/vision/detection/utils/object_detection/box_list.py
+++ b/official/vision/detection/utils/object_detection/box_list.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Bounding Box List definition.
+
+BoxList represents a list of bounding boxes as tensorflow
+tensors, where each bounding box is represented as a row of 4 numbers,
+[y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes
+within a given list correspond to a single image.  See also
+box_list_ops.py for common box related operations (such as area, iou, etc).
+
+Optionally, users can add additional related fields (such as weights).
+We assume the following things to be true about fields:
+* they correspond to boxes in the box_list along the 0th dimension
+* they have inferrable rank at graph construction time
+* all dimensions except for possibly the 0th can be inferred
+  (i.e., not None) at graph construction time.
+
+Some other notes:
+  * Following tensorflow conventions, we use height, width ordering,
+  and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
+  * Tensors are always provided as (flat) [N, 4] tensors.
+"""
+
+import tensorflow.compat.v2 as tf
+
+
+class BoxList(object):
+  """Box collection."""
+
+  def __init__(self, boxes):
+    """Constructs box collection.
+
+    Args:
+      boxes: a tensor of shape [N, 4] representing box corners
+
+    Raises:
+      ValueError: if invalid dimensions for bbox data or if bbox data is not in
+          float32 format.
+    """
+    if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
+      raise ValueError('Invalid dimensions for box data.')
+    if boxes.dtype != tf.float32:
+      raise ValueError('Invalid tensor type: should be tf.float32')
+    self.data = {'boxes': boxes}
+
+  def num_boxes(self):
+    """Returns number of boxes held in collection.
+
+    Returns:
+      a tensor representing the number of boxes held in the collection.
+    """
+    return tf.shape(input=self.data['boxes'])[0]
+
+  def num_boxes_static(self):
+    """Returns number of boxes held in collection.
+
+    This number is inferred at graph construction time rather than run-time.
+
+    Returns:
+      Number of boxes held in collection (integer) or None if this is not
+        inferrable at graph construction time.
+    """
+    return self.data['boxes'].get_shape().dims[0].value
+
+  def get_all_fields(self):
+    """Returns all fields."""
+    return self.data.keys()
+
+  def get_extra_fields(self):
+    """Returns all non-box fields (i.e., everything not named 'boxes')."""
+    return [k for k in self.data.keys() if k != 'boxes']
+
+  def add_field(self, field, field_data):
+    """Add field to box list.
+
+    This method can be used to add related box data such as
+    weights/labels, etc.
+
+    Args:
+      field: a string key to access the data via `get`
+      field_data: a tensor containing the data to store in the BoxList
+    """
+    self.data[field] = field_data
+
+  def has_field(self, field):
+    return field in self.data
+
+  def get(self):
+    """Convenience function for accessing box coordinates.
+
+    Returns:
+      a tensor with shape [N, 4] representing box coordinates.
+    """
+    return self.get_field('boxes')
+
+  def set(self, boxes):
+    """Convenience function for setting box coordinates.
+
+    Args:
+      boxes: a tensor of shape [N, 4] representing box corners
+
+    Raises:
+      ValueError: if invalid dimensions for bbox data
+    """
+    if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
+      raise ValueError('Invalid dimensions for box data.')
+    self.data['boxes'] = boxes
+
+  def get_field(self, field):
+    """Accesses a box collection and associated fields.
+
+    This function returns specified field with object; if no field is specified,
+    it returns the box coordinates.
+
+    Args:
+      field: this optional string parameter can be used to specify
+        a related field to be accessed.
+
+    Returns:
+      a tensor representing the box collection or an associated field.
+
+    Raises:
+      ValueError: if invalid field
+    """
+    if not self.has_field(field):
+      raise ValueError('field ' + str(field) + ' does not exist')
+    return self.data[field]
+
+  def set_field(self, field, value):
+    """Sets the value of a field.
+
+    Updates the field of a box_list with a given value.
+
+    Args:
+      field: (string) name of the field to set value.
+      value: the value to assign to the field.
+
+    Raises:
+      ValueError: if the box_list does not have specified field.
+    """
+    if not self.has_field(field):
+      raise ValueError('field %s does not exist' % field)
+    self.data[field] = value
+
+  def get_center_coordinates_and_sizes(self, scope=None):
+    """Computes the center coordinates, height and width of the boxes.
+
+    Args:
+      scope: name scope of the function.
+
+    Returns:
+      a list of 4 1-D tensors [ycenter, xcenter, height, width].
+    """
+    if not scope:
+      scope = 'get_center_coordinates_and_sizes'
+    with tf.name_scope(scope):
+      box_corners = self.get()
+      ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(a=box_corners))
+      width = xmax - xmin
+      height = ymax - ymin
+      ycenter = ymin + height / 2.
+      xcenter = xmin + width / 2.
+      return [ycenter, xcenter, height, width]
+
+  def transpose_coordinates(self, scope=None):
+    """Transpose the coordinate representation in a boxlist.
+
+    Args:
+      scope: name scope of the function.
+    """
+    if not scope:
+      scope = 'transpose_coordinates'
+    with tf.name_scope(scope):
+      y_min, x_min, y_max, x_max = tf.split(
+          value=self.get(), num_or_size_splits=4, axis=1)
+      self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
+
+  def as_tensor_dict(self, fields=None):
+    """Retrieves specified fields as a dictionary of tensors.
+
+    Args:
+      fields: (optional) list of fields to return in the dictionary.
+        If None (default), all fields are returned.
+
+    Returns:
+      tensor_dict: A dictionary of tensors specified by fields.
+
+    Raises:
+      ValueError: if specified field is not contained in boxlist.
+    """
+    tensor_dict = {}
+    if fields is None:
+      fields = self.get_all_fields()
+    for field in fields:
+      if not self.has_field(field):
+        raise ValueError('boxlist must contain all specified fields')
+      tensor_dict[field] = self.get_field(field)
+    return tensor_dict
--- a/official/vision/detection/utils/object_detection/box_list_ops.py
+++ b/official/vision/detection/utils/object_detection/box_list_ops.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Bounding Box List operations.
+
+Example box operations that are supported:
+  * areas: compute bounding box areas
+  * iou: pairwise intersection-over-union scores
+  * sq_dist: pairwise distances between bounding boxes
+
+Whenever box_list_ops functions output a BoxList, the fields of the incoming
+BoxList are retained unless documented otherwise.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from six.moves import range
+import tensorflow.compat.v2 as tf
+
+from official.vision.detection.utils.object_detection import box_list
+from official.vision.detection.utils.object_detection import ops
+
+
+class SortOrder(object):
+  """Enum class for sort order.
+
+  Attributes:
+    ascend: ascend order.
+    descend: descend order.
+  """
+  ascend = 1
+  descend = 2
+
+
+def area(boxlist, scope=None):
+  """Computes area of boxes.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N] representing box areas.
+  """
+  with tf.name_scope(scope, 'Area'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
+
+
+def height_width(boxlist, scope=None):
+  """Computes height and width of boxes in boxlist.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    Height: A tensor with shape [N] representing box heights.
+    Width: A tensor with shape [N] representing box widths.
+  """
+  with tf.name_scope(scope, 'HeightWidth'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1])
+
+
+def scale(boxlist, y_scale, x_scale, scope=None):
+  """scale box coordinates in x and y dimensions.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    y_scale: (float) scalar tensor
+    x_scale: (float) scalar tensor
+    scope: name scope.
+
+  Returns:
+    boxlist: BoxList holding N boxes
+  """
+  with tf.name_scope(scope, 'Scale'):
+    y_scale = tf.cast(y_scale, tf.float32)
+    x_scale = tf.cast(x_scale, tf.float32)
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    y_min = y_scale * y_min
+    y_max = y_scale * y_max
+    x_min = x_scale * x_min
+    x_max = x_scale * x_max
+    scaled_boxlist = box_list.BoxList(
+        tf.concat([y_min, x_min, y_max, x_max], 1))
+    return _copy_extra_fields(scaled_boxlist, boxlist)
+
+
+def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
+  """Clip bounding boxes to a window.
+
+  This op clips any input bounding boxes (represented by bounding box
+  corners) to a window, optionally filtering out boxes that do not
+  overlap at all with the window.
+
+  Args:
+    boxlist: BoxList holding M_in boxes
+    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+      window to which the op should clip boxes.
+    filter_nonoverlapping: whether to filter out boxes that do not overlap at
+      all with the window.
+    scope: name scope.
+
+  Returns:
+    a BoxList holding M_out boxes where M_out <= M_in
+  """
+  with tf.name_scope(scope, 'ClipToWindow'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+    y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
+    y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
+    x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
+    x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
+    clipped = box_list.BoxList(
+        tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
+                  1))
+    clipped = _copy_extra_fields(clipped, boxlist)
+    if filter_nonoverlapping:
+      areas = area(clipped)
+      nonzero_area_indices = tf.cast(
+          tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
+      clipped = gather(clipped, nonzero_area_indices)
+    return clipped
+
+
+def prune_outside_window(boxlist, window, scope=None):
+  """Prunes bounding boxes that fall outside a given window.
+
+  This function prunes bounding boxes that even partially fall outside the given
+  window. See also clip_to_window which only prunes bounding boxes that fall
+  completely outside the window, and clips any bounding boxes that partially
+  overflow.
+
+  Args:
+    boxlist: a BoxList holding M_in boxes.
+    window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
+      of the window
+    scope: name scope.
+
+  Returns:
+    pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
+    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
+     in the input tensor.
+  """
+  with tf.name_scope(scope, 'PruneOutsideWindow'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+    coordinate_violations = tf.concat([
+        tf.less(y_min, win_y_min), tf.less(x_min, win_x_min),
+        tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max)
+    ], 1)
+    valid_indices = tf.reshape(
+        tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
+    return gather(boxlist, valid_indices), valid_indices
+
+
+def prune_completely_outside_window(boxlist, window, scope=None):
+  """Prunes bounding boxes that fall completely outside of the given window.
+
+  The function clip_to_window prunes bounding boxes that fall
+  completely outside the window, but also clips any bounding boxes that
+  partially overflow. This function does not clip partially overflowing boxes.
+
+  Args:
+    boxlist: a BoxList holding M_in boxes.
+    window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
+      of the window
+    scope: name scope.
+
+  Returns:
+    pruned_boxlist: a new BoxList with all bounding boxes partially or fully in
+      the window.
+    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
+     in the input tensor.
+  """
+  with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+    coordinate_violations = tf.concat([
+        tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max),
+        tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min)
+    ], 1)
+    valid_indices = tf.reshape(
+        tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
+    return gather(boxlist, valid_indices), valid_indices
+
+
+def intersection(boxlist1, boxlist2, scope=None):
+  """Compute pairwise intersection areas between boxes.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise intersections
+  """
+  with tf.name_scope(scope, 'Intersection'):
+    y_min1, x_min1, y_max1, x_max1 = tf.split(
+        value=boxlist1.get(), num_or_size_splits=4, axis=1)
+    y_min2, x_min2, y_max2, x_max2 = tf.split(
+        value=boxlist2.get(), num_or_size_splits=4, axis=1)
+    all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
+    all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
+    intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
+    all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
+    all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
+    intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
+    return intersect_heights * intersect_widths
+
+
+def matched_intersection(boxlist1, boxlist2, scope=None):
+  """Compute intersection areas between corresponding boxes in two boxlists.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N] representing pairwise intersections
+  """
+  with tf.name_scope(scope, 'MatchedIntersection'):
+    y_min1, x_min1, y_max1, x_max1 = tf.split(
+        value=boxlist1.get(), num_or_size_splits=4, axis=1)
+    y_min2, x_min2, y_max2, x_max2 = tf.split(
+        value=boxlist2.get(), num_or_size_splits=4, axis=1)
+    min_ymax = tf.minimum(y_max1, y_max2)
+    max_ymin = tf.maximum(y_min1, y_min2)
+    intersect_heights = tf.maximum(0.0, min_ymax - max_ymin)
+    min_xmax = tf.minimum(x_max1, x_max2)
+    max_xmin = tf.maximum(x_min1, x_min2)
+    intersect_widths = tf.maximum(0.0, min_xmax - max_xmin)
+    return tf.reshape(intersect_heights * intersect_widths, [-1])
+
+
+def iou(boxlist1, boxlist2, scope=None):
+  """Computes pairwise intersection-over-union between box collections.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise iou scores.
+  """
+  with tf.name_scope(scope, 'IOU'):
+    intersections = intersection(boxlist1, boxlist2)
+    areas1 = area(boxlist1)
+    areas2 = area(boxlist2)
+    unions = (
+        tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
+    return tf.where(
+        tf.equal(intersections, 0.0),
+        tf.zeros_like(intersections), tf.truediv(intersections, unions))
+
+
+def matched_iou(boxlist1, boxlist2, scope=None):
+  """Compute intersection-over-union between corresponding boxes in boxlists.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N] representing pairwise iou scores.
+  """
+  with tf.name_scope(scope, 'MatchedIOU'):
+    intersections = matched_intersection(boxlist1, boxlist2)
+    areas1 = area(boxlist1)
+    areas2 = area(boxlist2)
+    unions = areas1 + areas2 - intersections
+    return tf.where(
+        tf.equal(intersections, 0.0),
+        tf.zeros_like(intersections), tf.truediv(intersections, unions))
+
+
+def ioa(boxlist1, boxlist2, scope=None):
+  """Computes pairwise intersection-over-area between box collections.
+
+  intersection-over-area (IOA) between two boxes box1 and box2 is defined as
+  their intersection area over box2's area. Note that ioa is not symmetric,
+  that is, ioa(box1, box2) != ioa(box2, box1).
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise ioa scores.
+  """
+  with tf.name_scope(scope, 'IOA'):
+    intersections = intersection(boxlist1, boxlist2)
+    areas = tf.expand_dims(area(boxlist2), 0)
+    return tf.truediv(intersections, areas)
+
+
+def prune_non_overlapping_boxes(
+    boxlist1, boxlist2, min_overlap=0.0, scope=None):
+  """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
+
+  For each box in boxlist1, we want its IOA to be more than minoverlap with
+  at least one of the boxes in boxlist2. If it does not, we remove it.
+
+  Args:
+    boxlist1: BoxList holding N boxes.
+    boxlist2: BoxList holding M boxes.
+    min_overlap: Minimum required overlap between boxes, to count them as
+                overlapping.
+    scope: name scope.
+
+  Returns:
+    new_boxlist1: A pruned boxlist with size [N', 4].
+    keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
+      first input BoxList `boxlist1`.
+  """
+  with tf.name_scope(scope, 'PruneNonOverlappingBoxes'):
+    ioa_ = ioa(boxlist2, boxlist1)  # [M, N] tensor
+    ioa_ = tf.reduce_max(ioa_, reduction_indices=[0])  # [N] tensor
+    keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap))
+    keep_inds = tf.squeeze(tf.where(keep_bool), axis=[1])
+    new_boxlist1 = gather(boxlist1, keep_inds)
+    return new_boxlist1, keep_inds
+
+
+def prune_small_boxes(boxlist, min_side, scope=None):
+  """Prunes small boxes in the boxlist which have a side smaller than min_side.
+
+  Args:
+    boxlist: BoxList holding N boxes.
+    min_side: Minimum width AND height of box to survive pruning.
+    scope: name scope.
+
+  Returns:
+    A pruned boxlist.
+  """
+  with tf.name_scope(scope, 'PruneSmallBoxes'):
+    height, width = height_width(boxlist)
+    is_valid = tf.logical_and(tf.greater_equal(width, min_side),
+                              tf.greater_equal(height, min_side))
+    return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
+
+
+def change_coordinate_frame(boxlist, window, scope=None):
+  """Change coordinate frame of the boxlist to be relative to window's frame.
+
+  Given a window of the form [ymin, xmin, ymax, xmax],
+  changes bounding box coordinates from boxlist to be relative to this window
+  (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
+
+  An example use case is data augmentation: where we are given groundtruth
+  boxes (boxlist) and would like to randomly crop the image to some
+  window (window). In this case we need to change the coordinate frame of
+  each groundtruth box to be relative to this new window.
+
+  Args:
+    boxlist: A BoxList object holding N boxes.
+    window: A rank 1 tensor [4].
+    scope: name scope.
+
+  Returns:
+    Returns a BoxList object with N boxes.
+  """
+  with tf.name_scope(scope, 'ChangeCoordinateFrame'):
+    win_height = window[2] - window[0]
+    win_width = window[3] - window[1]
+    boxlist_new = scale(box_list.BoxList(
+        boxlist.get() - [window[0], window[1], window[0], window[1]]),
+                        1.0 / win_height, 1.0 / win_width)
+    boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
+    return boxlist_new
+
+
+def sq_dist(boxlist1, boxlist2, scope=None):
+  """Computes the pairwise squared distances between box corners.
+
+  This op treats each box as if it were a point in a 4d Euclidean space and
+  computes pairwise squared distances.
+
+  Mathematically, we are given two matrices of box coordinates X and Y,
+  where X(i,:) is the i'th row of X, containing the 4 numbers defining the
+  corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to
+  boxlist2.  We compute
+  Z(i,j) = ||X(i,:) - Y(j,:)||^2
+         = ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:),
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise distances
+  """
+  with tf.name_scope(scope, 'SqDist'):
+    sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
+    sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
+    innerprod = tf.matmul(boxlist1.get(), boxlist2.get(),
+                          transpose_a=False, transpose_b=True)
+    return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
+
+
+def boolean_mask(boxlist, indicator, fields=None, scope=None,
+                 use_static_shapes=False, indicator_sum=None):
+  """Select boxes from BoxList according to indicator and return new BoxList.
+
+  `boolean_mask` returns the subset of boxes that are marked as "True" by the
+  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
+  the input index list, as well as all additional fields stored in the boxlist
+  (indexing into the first dimension).  However one can optionally only draw
+  from a subset of fields.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    indicator: a rank-1 boolean tensor
+    fields: (optional) list of fields to also gather from.  If None (default),
+      all fields are gathered from.  Pass an empty fields list to only gather
+      the box coordinates.
+    scope: name scope.
+    use_static_shapes: Whether to use an implementation with static shape
+      gurantees.
+    indicator_sum: An integer containing the sum of `indicator` vector. Only
+      required if `use_static_shape` is True.
+
+  Returns:
+    subboxlist: a BoxList corresponding to the subset of the input BoxList
+      specified by indicator
+  Raises:
+    ValueError: if `indicator` is not a rank-1 boolean tensor.
+  """
+  with tf.name_scope(scope, 'BooleanMask'):
+    if indicator.shape.ndims != 1:
+      raise ValueError('indicator should have rank 1')
+    if indicator.dtype != tf.bool:
+      raise ValueError('indicator should be a boolean tensor')
+    if use_static_shapes:
+      if not (indicator_sum and isinstance(indicator_sum, int)):
+        raise ValueError('`indicator_sum` must be a of type int')
+      selected_positions = tf.cast(indicator, dtype=tf.float32)
+      indexed_positions = tf.cast(
+          tf.multiply(
+              tf.cumsum(selected_positions), selected_positions),
+          dtype=tf.int32)
+      one_hot_selector = tf.one_hot(
+          indexed_positions - 1, indicator_sum, dtype=tf.float32)
+      sampled_indices = tf.cast(
+          tf.tensordot(
+              tf.cast(tf.range(tf.shape(indicator)[0]), dtype=tf.float32),
+              one_hot_selector,
+              axes=[0, 0]),
+          dtype=tf.int32)
+      return gather(boxlist, sampled_indices, use_static_shapes=True)
+    else:
+      subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
+      if fields is None:
+        fields = boxlist.get_extra_fields()
+      for field in fields:
+        if not boxlist.has_field(field):
+          raise ValueError('boxlist must contain all specified fields')
+        subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
+        subboxlist.add_field(field, subfieldlist)
+      return subboxlist
+
+
+def gather(boxlist, indices, fields=None, scope=None, use_static_shapes=False):
+  """Gather boxes from BoxList according to indices and return new BoxList.
+
+  By default, `gather` returns boxes corresponding to the input index list, as
+  well as all additional fields stored in the boxlist (indexing into the
+  first dimension).  However one can optionally only gather from a
+  subset of fields.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    indices: a rank-1 tensor of type int32 / int64
+    fields: (optional) list of fields to also gather from.  If None (default),
+      all fields are gathered from.  Pass an empty fields list to only gather
+      the box coordinates.
+    scope: name scope.
+    use_static_shapes: Whether to use an implementation with static shape
+      gurantees.
+
+  Returns:
+    subboxlist: a BoxList corresponding to the subset of the input BoxList
+    specified by indices
+  Raises:
+    ValueError: if specified field is not contained in boxlist or if the
+      indices are not of type int32
+  """
+  with tf.name_scope(scope, 'Gather'):
+    if len(indices.shape.as_list()) != 1:
+      raise ValueError('indices should have rank 1')
+    if indices.dtype != tf.int32 and indices.dtype != tf.int64:
+      raise ValueError('indices should be an int32 / int64 tensor')
+    gather_op = tf.gather
+    if use_static_shapes:
+      gather_op = ops.matmul_gather_on_zeroth_axis
+    subboxlist = box_list.BoxList(gather_op(boxlist.get(), indices))
+    if fields is None:
+      fields = boxlist.get_extra_fields()
+    fields += ['boxes']
+    for field in fields:
+      if not boxlist.has_field(field):
+        raise ValueError('boxlist must contain all specified fields')
+      subfieldlist = gather_op(boxlist.get_field(field), indices)
+      subboxlist.add_field(field, subfieldlist)
+    return subboxlist
+
+
+def concatenate(boxlists, fields=None, scope=None):
+  """Concatenate list of BoxLists.
+
+  This op concatenates a list of input BoxLists into a larger BoxList.  It also
+  handles concatenation of BoxList fields as long as the field tensor shapes
+  are equal except for the first dimension.
+
+  Args:
+    boxlists: list of BoxList objects
+    fields: optional list of fields to also concatenate.  By default, all
+      fields from the first BoxList in the list are included in the
+      concatenation.
+    scope: name scope.
+
+  Returns:
+    a BoxList with number of boxes equal to
+      sum([boxlist.num_boxes() for boxlist in BoxList])
+  Raises:
+    ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
+      contains non BoxList objects), or if requested fields are not contained in
+      all boxlists
+  """
+  with tf.name_scope(scope, 'Concatenate'):
+    if not isinstance(boxlists, list):
+      raise ValueError('boxlists should be a list')
+    if not boxlists:
+      raise ValueError('boxlists should have nonzero length')
+    for boxlist in boxlists:
+      if not isinstance(boxlist, box_list.BoxList):
+        raise ValueError('all elements of boxlists should be BoxList objects')
+    concatenated = box_list.BoxList(
+        tf.concat([boxlist.get() for boxlist in boxlists], 0))
+    if fields is None:
+      fields = boxlists[0].get_extra_fields()
+    for field in fields:
+      first_field_shape = boxlists[0].get_field(field).get_shape().as_list()
+      first_field_shape[0] = -1
+      if None in first_field_shape:
+        raise ValueError('field %s must have fully defined shape except for the'
+                         ' 0th dimension.' % field)
+      for boxlist in boxlists:
+        if not boxlist.has_field(field):
+          raise ValueError('boxlist must contain all requested fields')
+        field_shape = boxlist.get_field(field).get_shape().as_list()
+        field_shape[0] = -1
+        if field_shape != first_field_shape:
+          raise ValueError('field %s must have same shape for all boxlists '
+                           'except for the 0th dimension.' % field)
+      concatenated_field = tf.concat(
+          [boxlist.get_field(field) for boxlist in boxlists], 0)
+      concatenated.add_field(field, concatenated_field)
+    return concatenated
+
+
+def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
+  """Sort boxes and associated fields according to a scalar field.
+
+  A common use case is reordering the boxes according to descending scores.
+
+  Args:
+    boxlist: BoxList holding N boxes.
+    field: A BoxList field for sorting and reordering the BoxList.
+    order: (Optional) descend or ascend. Default is descend.
+    scope: name scope.
+
+  Returns:
+    sorted_boxlist: A sorted BoxList with the field in the specified order.
+
+  Raises:
+    ValueError: if specified field does not exist
+    ValueError: if the order is not either descend or ascend
+  """
+  with tf.name_scope(scope, 'SortByField'):
+    if order != SortOrder.descend and order != SortOrder.ascend:
+      raise ValueError('Invalid sort order')
+
+    field_to_sort = boxlist.get_field(field)
+    if len(field_to_sort.shape.as_list()) != 1:
+      raise ValueError('Field should have rank 1')
+
+    num_boxes = boxlist.num_boxes()
+    num_entries = tf.size(field_to_sort)
+    length_assert = tf.Assert(
+        tf.equal(num_boxes, num_entries),
+        ['Incorrect field size: actual vs expected.', num_entries, num_boxes])
+
+    with tf.control_dependencies([length_assert]):
+      _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True)
+
+    if order == SortOrder.ascend:
+      sorted_indices = tf.reverse_v2(sorted_indices, [0])
+
+    return gather(boxlist, sorted_indices)
+
+
+def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
+  """Overlay bounding box list on image.
+
+  Currently this visualization plots a 1 pixel thick red bounding box on top
+  of the image.  Note that tf.image.draw_bounding_boxes essentially is
+  1 indexed.
+
+  Args:
+    image: an image tensor with shape [height, width, 3]
+    boxlist: a BoxList
+    normalized: (boolean) specify whether corners are to be interpreted
+      as absolute coordinates in image space or normalized with respect to the
+      image size.
+    scope: name scope.
+
+  Returns:
+    image_and_boxes: an image tensor with shape [height, width, 3]
+  """
+  with tf.name_scope(scope, 'VisualizeBoxesInImage'):
+    if not normalized:
+      height, width, _ = tf.unstack(tf.shape(image))
+      boxlist = scale(boxlist,
+                      1.0 / tf.cast(height, tf.float32),
+                      1.0 / tf.cast(width, tf.float32))
+    corners = tf.expand_dims(boxlist.get(), 0)
+    image = tf.expand_dims(image, 0)
+    return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0])
+
+
+def filter_field_value_equals(boxlist, field, value, scope=None):
+  """Filter to keep only boxes with field entries equal to the given value.
+
+  Args:
+    boxlist: BoxList holding N boxes.
+    field: field name for filtering.
+    value: scalar value.
+    scope: name scope.
+
+  Returns:
+    a BoxList holding M boxes where M <= N
+
+  Raises:
+    ValueError: if boxlist not a BoxList object or if it does not have
+      the specified field.
+  """
+  with tf.name_scope(scope, 'FilterFieldValueEquals'):
+    if not isinstance(boxlist, box_list.BoxList):
+      raise ValueError('boxlist must be a BoxList')
+    if not boxlist.has_field(field):
+      raise ValueError('boxlist must contain the specified field')
+    filter_field = boxlist.get_field(field)
+    gather_index = tf.reshape(tf.where(tf.equal(filter_field, value)), [-1])
+    return gather(boxlist, gather_index)
+
+
+def filter_greater_than(boxlist, thresh, scope=None):
+  """Filter to keep only boxes with score exceeding a given threshold.
+
+  This op keeps the collection of boxes whose corresponding scores are
+  greater than the input threshold.
+
+  TODO(jonathanhuang): Change function name to filter_scores_greater_than
+
+  Args:
+    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+      representing detection scores.
+    thresh: scalar threshold
+    scope: name scope.
+
+  Returns:
+    a BoxList holding M boxes where M <= N
+
+  Raises:
+    ValueError: if boxlist not a BoxList object or if it does not
+      have a scores field
+  """
+  with tf.name_scope(scope, 'FilterGreaterThan'):
+    if not isinstance(boxlist, box_list.BoxList):
+      raise ValueError('boxlist must be a BoxList')
+    if not boxlist.has_field('scores'):
+      raise ValueError('input boxlist must have \'scores\' field')
+    scores = boxlist.get_field('scores')
+    if len(scores.shape.as_list()) > 2:
+      raise ValueError('Scores should have rank 1 or 2')
+    if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1:
+      raise ValueError('Scores should have rank 1 or have shape '
+                       'consistent with [None, 1]')
+    high_score_indices = tf.cast(tf.reshape(
+        tf.where(tf.greater(scores, thresh)),
+        [-1]), tf.int32)
+    return gather(boxlist, high_score_indices)
+
+
+def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
+  """Non maximum suppression.
+
+  This op greedily selects a subset of detection bounding boxes, pruning
+  away boxes that have high IOU (intersection over union) overlap (> thresh)
+  with already selected boxes.  Note that this only works for a single class ---
+  to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression.
+
+  Args:
+    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+      representing detection scores.
+    thresh: scalar threshold
+    max_output_size: maximum number of retained boxes
+    scope: name scope.
+
+  Returns:
+    a BoxList holding M boxes where M <= max_output_size
+  Raises:
+    ValueError: if thresh is not in [0, 1]
+  """
+  with tf.name_scope(scope, 'NonMaxSuppression'):
+    if not 0 <= thresh <= 1.0:
+      raise ValueError('thresh must be between 0 and 1')
+    if not isinstance(boxlist, box_list.BoxList):
+      raise ValueError('boxlist must be a BoxList')
+    if not boxlist.has_field('scores'):
+      raise ValueError('input boxlist must have \'scores\' field')
+    selected_indices = tf.image.non_max_suppression(
+        boxlist.get(), boxlist.get_field('scores'),
+        max_output_size, iou_threshold=thresh)
+    return gather(boxlist, selected_indices)
+
+
+def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
+  """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
+
+  Args:
+    boxlist_to_copy_to: BoxList to which extra fields are copied.
+    boxlist_to_copy_from: BoxList from which fields are copied.
+
+  Returns:
+    boxlist_to_copy_to with extra fields.
+  """
+  for field in boxlist_to_copy_from.get_extra_fields():
+    boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
+  return boxlist_to_copy_to
+
+
+def to_normalized_coordinates(boxlist, height, width,
+                              check_range=True, scope=None):
+  """Converts absolute box coordinates to normalized coordinates in [0, 1].
+
+  Usually one uses the dynamic shape of the image or conv-layer tensor:
+    boxlist = box_list_ops.to_normalized_coordinates(boxlist,
+                                                     tf.shape(images)[1],
+                                                     tf.shape(images)[2]),
+
+  This function raises an assertion failed error at graph execution time when
+  the maximum coordinate is smaller than 1.01 (which means that coordinates are
+  already normalized). The value 1.01 is to deal with small rounding errors.
+
+  Args:
+    boxlist: BoxList with coordinates in terms of pixel-locations.
+    height: Maximum value for height of absolute box coordinates.
+    width: Maximum value for width of absolute box coordinates.
+    check_range: If True, checks if the coordinates are normalized or not.
+    scope: name scope.
+
+  Returns:
+    boxlist with normalized coordinates in [0, 1].
+  """
+  with tf.name_scope(scope, 'ToNormalizedCoordinates'):
+    height = tf.cast(height, tf.float32)
+    width = tf.cast(width, tf.float32)
+
+    if check_range:
+      max_val = tf.reduce_max(boxlist.get())
+      max_assert = tf.Assert(tf.greater(max_val, 1.01),
+                             ['max value is lower than 1.01: ', max_val])
+      with tf.control_dependencies([max_assert]):
+        width = tf.identity(width)
+
+    return scale(boxlist, 1 / height, 1 / width)
+
+
+def to_absolute_coordinates(boxlist,
+                            height,
+                            width,
+                            check_range=True,
+                            maximum_normalized_coordinate=1.1,
+                            scope=None):
+  """Converts normalized box coordinates to absolute pixel coordinates.
+
+  This function raises an assertion failed error when the maximum box coordinate
+  value is larger than maximum_normalized_coordinate (in which case coordinates
+  are already absolute).
+
+  Args:
+    boxlist: BoxList with coordinates in range [0, 1].
+    height: Maximum value for height of absolute box coordinates.
+    width: Maximum value for width of absolute box coordinates.
+    check_range: If True, checks if the coordinates are normalized or not.
+    maximum_normalized_coordinate: Maximum coordinate value to be considered
+      as normalized, default to 1.1.
+    scope: name scope.
+
+  Returns:
+    boxlist with absolute coordinates in terms of the image size.
+
+  """
+  with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
+    height = tf.cast(height, tf.float32)
+    width = tf.cast(width, tf.float32)
+
+    # Ensure range of input boxes is correct.
+    if check_range:
+      box_maximum = tf.reduce_max(boxlist.get())
+      max_assert = tf.Assert(
+          tf.greater_equal(maximum_normalized_coordinate, box_maximum),
+          ['maximum box coordinate value is larger '
+           'than %f: ' % maximum_normalized_coordinate, box_maximum])
+      with tf.control_dependencies([max_assert]):
+        width = tf.identity(width)
+
+    return scale(boxlist, height, width)
+
+
+def refine_boxes_multi_class(pool_boxes,
+                             num_classes,
+                             nms_iou_thresh,
+                             nms_max_detections,
+                             voting_iou_thresh=0.5):
+  """Refines a pool of boxes using non max suppression and box voting.
+
+  Box refinement is done independently for each class.
+
+  Args:
+    pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
+      have a rank 1 'scores' field and a rank 1 'classes' field.
+    num_classes: (int scalar) Number of classes.
+    nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
+    nms_max_detections: (int scalar) maximum output size for NMS.
+    voting_iou_thresh: (float scalar) iou threshold for box voting.
+
+  Returns:
+    BoxList of refined boxes.
+
+  Raises:
+    ValueError: if
+      a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
+      b) pool_boxes is not a BoxList.
+      c) pool_boxes does not have a scores and classes field.
+  """
+  if not 0.0 <= nms_iou_thresh <= 1.0:
+    raise ValueError('nms_iou_thresh must be between 0 and 1')
+  if not 0.0 <= voting_iou_thresh <= 1.0:
+    raise ValueError('voting_iou_thresh must be between 0 and 1')
+  if not isinstance(pool_boxes, box_list.BoxList):
+    raise ValueError('pool_boxes must be a BoxList')
+  if not pool_boxes.has_field('scores'):
+    raise ValueError('pool_boxes must have a \'scores\' field')
+  if not pool_boxes.has_field('classes'):
+    raise ValueError('pool_boxes must have a \'classes\' field')
+
+  refined_boxes = []
+  for i in range(num_classes):
+    boxes_class = filter_field_value_equals(pool_boxes, 'classes', i)
+    refined_boxes_class = refine_boxes(boxes_class, nms_iou_thresh,
+                                       nms_max_detections, voting_iou_thresh)
+    refined_boxes.append(refined_boxes_class)
+  return sort_by_field(concatenate(refined_boxes), 'scores')
+
+
+def refine_boxes(pool_boxes,
+                 nms_iou_thresh,
+                 nms_max_detections,
+                 voting_iou_thresh=0.5):
+  """Refines a pool of boxes using non max suppression and box voting.
+
+  Args:
+    pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
+      have a rank 1 'scores' field.
+    nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
+    nms_max_detections: (int scalar) maximum output size for NMS.
+    voting_iou_thresh: (float scalar) iou threshold for box voting.
+
+  Returns:
+    BoxList of refined boxes.
+
+  Raises:
+    ValueError: if
+      a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
+      b) pool_boxes is not a BoxList.
+      c) pool_boxes does not have a scores field.
+  """
+  if not 0.0 <= nms_iou_thresh <= 1.0:
+    raise ValueError('nms_iou_thresh must be between 0 and 1')
+  if not 0.0 <= voting_iou_thresh <= 1.0:
+    raise ValueError('voting_iou_thresh must be between 0 and 1')
+  if not isinstance(pool_boxes, box_list.BoxList):
+    raise ValueError('pool_boxes must be a BoxList')
+  if not pool_boxes.has_field('scores'):
+    raise ValueError('pool_boxes must have a \'scores\' field')
+
+  nms_boxes = non_max_suppression(
+      pool_boxes, nms_iou_thresh, nms_max_detections)
+  return box_voting(nms_boxes, pool_boxes, voting_iou_thresh)
+
+
+def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
+  """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
+
+  Performs box voting as described in 'Object detection via a multi-region &
+  semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
+  each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
+  with iou overlap >= iou_thresh. The location of B is set to the weighted
+  average location of boxes in S (scores are used for weighting). And the score
+  of B is set to the average score of boxes in S.
+
+  Args:
+    selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
+      boxes are usually selected from pool_boxes using non max suppression.
+    pool_boxes: BoxList containing a set of (possibly redundant) boxes.
+    iou_thresh: (float scalar) iou threshold for matching boxes in
+      selected_boxes and pool_boxes.
+
+  Returns:
+    BoxList containing averaged locations and scores for each box in
+    selected_boxes.
+
+  Raises:
+    ValueError: if
+      a) selected_boxes or pool_boxes is not a BoxList.
+      b) if iou_thresh is not in [0, 1].
+      c) pool_boxes does not have a scores field.
+  """
+  if not 0.0 <= iou_thresh <= 1.0:
+    raise ValueError('iou_thresh must be between 0 and 1')
+  if not isinstance(selected_boxes, box_list.BoxList):
+    raise ValueError('selected_boxes must be a BoxList')
+  if not isinstance(pool_boxes, box_list.BoxList):
+    raise ValueError('pool_boxes must be a BoxList')
+  if not pool_boxes.has_field('scores'):
+    raise ValueError('pool_boxes must have a \'scores\' field')
+
+  iou_ = iou(selected_boxes, pool_boxes)
+  match_indicator = tf.cast(tf.greater(iou_, iou_thresh), dtype=tf.float32)
+  num_matches = tf.reduce_sum(match_indicator, 1)
+  # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not
+  # match to any boxes in pool_boxes. For such boxes without any matches, we
+  # should return the original boxes without voting.
+  match_assert = tf.Assert(
+      tf.reduce_all(tf.greater(num_matches, 0)),
+      ['Each box in selected_boxes must match with at least one box '
+       'in pool_boxes.'])
+
+  scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
+  scores_assert = tf.Assert(
+      tf.reduce_all(tf.greater_equal(scores, 0)),
+      ['Scores must be non negative.'])
+
+  with tf.control_dependencies([scores_assert, match_assert]):
+    sum_scores = tf.matmul(match_indicator, scores)
+  averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches
+
+  box_locations = tf.matmul(match_indicator,
+                            pool_boxes.get() * scores) / sum_scores
+  averaged_boxes = box_list.BoxList(box_locations)
+  _copy_extra_fields(averaged_boxes, selected_boxes)
+  averaged_boxes.add_field('scores', averaged_scores)
+  return averaged_boxes
+
+
+def get_minimal_coverage_box(boxlist,
+                             default_box=None,
+                             scope=None):
+  """Creates a single bounding box which covers all boxes in the boxlist.
+
+  Args:
+    boxlist: A Boxlist.
+    default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
+      this default box will be returned. If None, will use a default box of
+      [[0., 0., 1., 1.]].
+    scope: Name scope.
+
+  Returns:
+    A [1, 4] float32 tensor with a bounding box that tightly covers all the
+    boxes in the box list. If the boxlist does not contain any boxes, the
+    default box is returned.
+  """
+  with tf.name_scope(scope, 'CreateCoverageBox'):
+    num_boxes = boxlist.num_boxes()
+
+    def coverage_box(bboxes):
+      y_min, x_min, y_max, x_max = tf.split(
+          value=bboxes, num_or_size_splits=4, axis=1)
+      y_min_coverage = tf.reduce_min(y_min, axis=0)
+      x_min_coverage = tf.reduce_min(x_min, axis=0)
+      y_max_coverage = tf.reduce_max(y_max, axis=0)
+      x_max_coverage = tf.reduce_max(x_max, axis=0)
+      return tf.stack(
+          [y_min_coverage, x_min_coverage, y_max_coverage, x_max_coverage],
+          axis=1)
+
+    default_box = default_box or tf.constant([[0., 0., 1., 1.]])
+    return tf.cond(
+        tf.greater_equal(num_boxes, 1),
+        true_fn=lambda: coverage_box(boxlist.get()),
+        false_fn=lambda: default_box)
+
+
+def sample_boxes_by_jittering(boxlist,
+                              num_boxes_to_sample,
+                              stddev=0.1,
+                              scope=None):
+  """Samples num_boxes_to_sample boxes by jittering around boxlist boxes.
+
+  It is possible that this function might generate boxes with size 0. The larger
+  the stddev, this is more probable. For a small stddev of 0.1 this probability
+  is very small.
+
+  Args:
+    boxlist: A boxlist containing N boxes in normalized coordinates.
+    num_boxes_to_sample: A positive integer containing the number of boxes to
+      sample.
+    stddev: Standard deviation. This is used to draw random offsets for the
+      box corners from a normal distribution. The offset is multiplied by the
+      box size so will be larger in terms of pixels for larger boxes.
+    scope: Name scope.
+
+  Returns:
+    sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in
+      normalized coordinates.
+  """
+  with tf.name_scope(scope, 'SampleBoxesByJittering'):
+    num_boxes = boxlist.num_boxes()
+    box_indices = tf.random_uniform(
+        [num_boxes_to_sample],
+        minval=0,
+        maxval=num_boxes,
+        dtype=tf.int32)
+    sampled_boxes = tf.gather(boxlist.get(), box_indices)
+    sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0]
+    sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1]
+    rand_miny_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev)
+    rand_minx_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev)
+    rand_maxy_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev)
+    rand_maxx_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev)
+    miny = rand_miny_gaussian * sampled_boxes_height + sampled_boxes[:, 0]
+    minx = rand_minx_gaussian * sampled_boxes_width + sampled_boxes[:, 1]
+    maxy = rand_maxy_gaussian * sampled_boxes_height + sampled_boxes[:, 2]
+    maxx = rand_maxx_gaussian * sampled_boxes_width + sampled_boxes[:, 3]
+    maxy = tf.maximum(miny, maxy)
+    maxx = tf.maximum(minx, maxx)
+    sampled_boxes = tf.stack([miny, minx, maxy, maxx], axis=1)
+    sampled_boxes = tf.maximum(tf.minimum(sampled_boxes, 1.0), 0.0)
+    return box_list.BoxList(sampled_boxes)
--- a/official/vision/detection/utils/object_detection/faster_rcnn_box_coder.py
+++ b/official/vision/detection/utils/object_detection/faster_rcnn_box_coder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Faster RCNN box coder.
+
+Faster RCNN box coder follows the coding schema described below:
+  ty = (y - ya) / ha
+  tx = (x - xa) / wa
+  th = log(h / ha)
+  tw = log(w / wa)
+  where x, y, w, h denote the box's center coordinates, width and height
+  respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+  coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+  center, width and height respectively.
+
+  See http://arxiv.org/abs/1506.01497 for details.
+"""
+
+import tensorflow.compat.v2 as tf
+
+from official.vision.detection.utils.object_detection import box_coder
+from official.vision.detection.utils.object_detection import box_list
+
+EPSILON = 1e-8
+
+
+class FasterRcnnBoxCoder(box_coder.BoxCoder):
+  """Faster RCNN box coder."""
+
+  def __init__(self, scale_factors=None):
+    """Constructor for FasterRcnnBoxCoder.
+
+    Args:
+      scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+        If set to None, does not perform scaling. For Faster RCNN,
+        the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
+    """
+    if scale_factors:
+      assert len(scale_factors) == 4
+      for scalar in scale_factors:
+        assert scalar > 0
+    self._scale_factors = scale_factors
+
+  @property
+  def code_size(self):
+    return 4
+
+  def _encode(self, boxes, anchors):
+    """Encode a box collection with respect to anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded.
+      anchors: BoxList of anchors.
+
+    Returns:
+      a tensor representing N anchor-encoded boxes of the format
+      [ty, tx, th, tw].
+    """
+    # Convert anchors to the center coordinate representation.
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+    ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+    # Avoid NaN in division and log below.
+    ha += EPSILON
+    wa += EPSILON
+    h += EPSILON
+    w += EPSILON
+
+    tx = (xcenter - xcenter_a) / wa
+    ty = (ycenter - ycenter_a) / ha
+    tw = tf.math.log(w / wa)
+    th = tf.math.log(h / ha)
+    # Scales location targets as used in paper for joint training.
+    if self._scale_factors:
+      ty *= self._scale_factors[0]
+      tx *= self._scale_factors[1]
+      th *= self._scale_factors[2]
+      tw *= self._scale_factors[3]
+    return tf.transpose(a=tf.stack([ty, tx, th, tw]))
+
+  def _decode(self, rel_codes, anchors):
+    """Decode relative codes to boxes.
+
+    Args:
+      rel_codes: a tensor representing N anchor-encoded boxes.
+      anchors: BoxList of anchors.
+
+    Returns:
+      boxes: BoxList holding N bounding boxes.
+    """
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+
+    ty, tx, th, tw = tf.unstack(tf.transpose(a=rel_codes))
+    if self._scale_factors:
+      ty /= self._scale_factors[0]
+      tx /= self._scale_factors[1]
+      th /= self._scale_factors[2]
+      tw /= self._scale_factors[3]
+    w = tf.exp(tw) * wa
+    h = tf.exp(th) * ha
+    ycenter = ty * ha + ycenter_a
+    xcenter = tx * wa + xcenter_a
+    ymin = ycenter - h / 2.
+    xmin = xcenter - w / 2.
+    ymax = ycenter + h / 2.
+    xmax = xcenter + w / 2.
+    return box_list.BoxList(tf.transpose(a=tf.stack([ymin, xmin, ymax, xmax])))
--- a/official/vision/detection/utils/object_detection/matcher.py
+++ b/official/vision/detection/utils/object_detection/matcher.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Matcher interface and Match class.
+
+This module defines the Matcher interface and the Match object. The job of the
+matcher is to match row and column indices based on the similarity matrix and
+other optional parameters. Each column is matched to at most one row. There
+are three possibilities for the matching:
+
+1) match: A column matches a row.
+2) no_match: A column does not match any row.
+3) ignore: A column that is neither 'match' nor no_match.
+
+The ignore case is regularly encountered in object detection: when an anchor has
+a relatively small overlap with a ground-truth box, one neither wants to
+consider this box a positive example (match) nor a negative example (no match).
+
+The Match class is used to store the match results and it provides simple apis
+to query the results.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow.compat.v2 as tf
+
+
+class Match(object):
+  """Class to store results from the matcher.
+
+  This class is used to store the results from the matcher. It provides
+  convenient methods to query the matching results.
+  """
+
+  def __init__(self, match_results):
+    """Constructs a Match object.
+
+    Args:
+      match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
+        meaning that column i is matched with row match_results[i].
+        (2) match_results[i]=-1, meaning that column i is not matched.
+        (3) match_results[i]=-2, meaning that column i is ignored.
+
+    Raises:
+      ValueError: if match_results does not have rank 1 or is not an
+        integer int32 scalar tensor
+    """
+    if match_results.shape.ndims != 1:
+      raise ValueError('match_results should have rank 1')
+    if match_results.dtype != tf.int32:
+      raise ValueError('match_results should be an int32 or int64 scalar '
+                       'tensor')
+    self._match_results = match_results
+
+  @property
+  def match_results(self):
+    """The accessor for match results.
+
+    Returns:
+      the tensor which encodes the match results.
+    """
+    return self._match_results
+
+  def matched_column_indices(self):
+    """Returns column indices that match to some row.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
+
+  def matched_column_indicator(self):
+    """Returns column indices that are matched.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return tf.greater_equal(self._match_results, 0)
+
+  def num_matched_columns(self):
+    """Returns number (int32 scalar tensor) of matched columns."""
+    return tf.size(input=self.matched_column_indices())
+
+  def unmatched_column_indices(self):
+    """Returns column indices that do not match any row.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
+
+  def unmatched_column_indicator(self):
+    """Returns column indices that are unmatched.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return tf.equal(self._match_results, -1)
+
+  def num_unmatched_columns(self):
+    """Returns number (int32 scalar tensor) of unmatched columns."""
+    return tf.size(input=self.unmatched_column_indices())
+
+  def ignored_column_indices(self):
+    """Returns column indices that are ignored (neither Matched nor Unmatched).
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
+
+  def ignored_column_indicator(self):
+    """Returns boolean column indicator where True means the colum is ignored.
+
+    Returns:
+      column_indicator: boolean vector which is True for all ignored column
+      indices.
+    """
+    return tf.equal(self._match_results, -2)
+
+  def num_ignored_columns(self):
+    """Returns number (int32 scalar tensor) of matched columns."""
+    return tf.size(input=self.ignored_column_indices())
+
+  def unmatched_or_ignored_column_indices(self):
+    """Returns column indices that are unmatched or ignored.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
+
+  def matched_row_indices(self):
+    """Returns row indices that match some column.
+
+    The indices returned by this op are ordered so as to be in correspondence
+    with the output of matched_column_indicator().  For example if
+    self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
+    [7, 3], then we know that column 0 was matched to row 7 and column 2 was
+    matched to row 3.
+
+    Returns:
+      row_indices: int32 tensor of shape [K] with row indices.
+    """
+    return self._reshape_and_cast(
+        tf.gather(self._match_results, self.matched_column_indices()))
+
+  def _reshape_and_cast(self, t):
+    return tf.cast(tf.reshape(t, [-1]), tf.int32)
+
+  def gather_based_on_match(self, input_tensor, unmatched_value,
+                            ignored_value):
+    """Gathers elements from `input_tensor` based on match results.
+
+    For columns that are matched to a row, gathered_tensor[col] is set to
+    input_tensor[match_results[col]]. For columns that are unmatched,
+    gathered_tensor[col] is set to unmatched_value. Finally, for columns that
+    are ignored gathered_tensor[col] is set to ignored_value.
+
+    Note that the input_tensor.shape[1:] must match with unmatched_value.shape
+    and ignored_value.shape
+
+    Args:
+      input_tensor: Tensor to gather values from.
+      unmatched_value: Constant tensor value for unmatched columns.
+      ignored_value: Constant tensor value for ignored columns.
+
+    Returns:
+      gathered_tensor: A tensor containing values gathered from input_tensor.
+        The shape of the gathered tensor is [match_results.shape[0]] +
+        input_tensor.shape[1:].
+    """
+    input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]),
+                              input_tensor], axis=0)
+    gather_indices = tf.maximum(self.match_results + 2, 0)
+    gathered_tensor = tf.gather(input_tensor, gather_indices)
+    return gathered_tensor
+
+
+class Matcher(object):
+  """Abstract base class for matcher.
+  """
+  __metaclass__ = ABCMeta
+
+  def match(self, similarity_matrix, scope=None, **params):
+    """Computes matches among row and column indices and returns the result.
+
+    Computes matches among the row and column indices based on the similarity
+    matrix and optional arguments.
+
+    Args:
+      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+        where higher value means more similar.
+      scope: Op scope name. Defaults to 'Match' if None.
+      **params: Additional keyword arguments for specific implementations of
+        the Matcher.
+
+    Returns:
+      A Match object with the results of matching.
+    """
+    if not scope:
+      scope = 'Match'
+    with tf.name_scope(scope) as scope:
+      return Match(self._match(similarity_matrix, **params))
+
+  @abstractmethod
+  def _match(self, similarity_matrix, **params):
+    """Method to be overridden by implementations.
+
+    Args:
+      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+        where higher value means more similar.
+      **params: Additional keyword arguments for specific implementations of
+        the Matcher.
+
+    Returns:
+      match_results: Integer tensor of shape [M]: match_results[i]>=0 means
+        that column i is matched to row match_results[i], match_results[i]=-1
+        means that the column is not matched. match_results[i]=-2 means that
+        the column is ignored (usually this happens when there is a very weak
+        match which one neither wants as positive nor negative example).
+    """
+    pass
--- a/official/vision/detection/utils/object_detection/minibatch_sampler.py
+++ b/official/vision/detection/utils/object_detection/minibatch_sampler.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base minibatch sampler module.
+
+The job of the minibatch_sampler is to subsample a minibatch based on some
+criterion.
+
+The main function call is:
+    subsample(indicator, batch_size, **params).
+Indicator is a 1d boolean tensor where True denotes which examples can be
+sampled. It returns a boolean indicator where True denotes an example has been
+sampled..
+
+Subclasses should implement the Subsample function and can make use of the
+@staticmethod SubsampleIndicator.
+
+This is originally implemented in TensorFlow Object Detection API.
+"""
+
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow.compat.v2 as tf
+
+from official.vision.detection.utils.object_detection import ops
+
+
+class MinibatchSampler(object):
+  """Abstract base class for subsampling minibatches."""
+  __metaclass__ = ABCMeta
+
+  def __init__(self):
+    """Constructs a minibatch sampler."""
+    pass
+
+  @abstractmethod
+  def subsample(self, indicator, batch_size, **params):
+    """Returns subsample of entries in indicator.
+
+    Args:
+      indicator: boolean tensor of shape [N] whose True entries can be sampled.
+      batch_size: desired batch size.
+      **params: additional keyword arguments for specific implementations of
+          the MinibatchSampler.
+
+    Returns:
+      sample_indicator: boolean tensor of shape [N] whose True entries have been
+      sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
+    """
+    pass
+
+  @staticmethod
+  def subsample_indicator(indicator, num_samples):
+    """Subsample indicator vector.
+
+    Given a boolean indicator vector with M elements set to `True`, the function
+    assigns all but `num_samples` of these previously `True` elements to
+    `False`. If `num_samples` is greater than M, the original indicator vector
+    is returned.
+
+    Args:
+      indicator: a 1-dimensional boolean tensor indicating which elements
+        are allowed to be sampled and which are not.
+      num_samples: int32 scalar tensor
+
+    Returns:
+      a boolean tensor with the same shape as input (indicator) tensor
+    """
+    indices = tf.where(indicator)
+    indices = tf.random.shuffle(indices)
+    indices = tf.reshape(indices, [-1])
+
+    num_samples = tf.minimum(tf.size(input=indices), num_samples)
+    selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1]))
+
+    selected_indicator = ops.indices_to_dense_vector(
+        selected_indices,
+        tf.shape(input=indicator)[0])
+
+    return tf.equal(selected_indicator, 1)
--- a/official/vision/detection/utils/object_detection/ops.py
+++ b/official/vision/detection/utils/object_detection/ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A module for helper tensorflow ops.
+
+This is originally implemented in TensorFlow Object Detection API.
+"""
+
+import tensorflow.compat.v2 as tf
+
+from official.vision.detection.utils.object_detection import shape_utils
+
+
+def indices_to_dense_vector(indices,
+                            size,
+                            indices_value=1.,
+                            default_value=0,
+                            dtype=tf.float32):
+  """Creates dense vector with indices set to specific value and rest to zeros.
+
+  This function exists because it is unclear if it is safe to use
+    tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
+  with indices which are not ordered.
+  This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
+
+  Args:
+    indices: 1d Tensor with integer indices which are to be set to
+        indices_values.
+    size: scalar with size (integer) of output Tensor.
+    indices_value: values of elements specified by indices in the output vector
+    default_value: values of other elements in the output vector.
+    dtype: data type.
+
+  Returns:
+    dense 1D Tensor of shape [size] with indices set to indices_values and the
+        rest set to default_value.
+  """
+  size = tf.cast(size, dtype=tf.int32)
+  zeros = tf.ones([size], dtype=dtype) * default_value
+  values = tf.ones_like(indices, dtype=dtype) * indices_value
+
+  return tf.dynamic_stitch(
+      [tf.range(size), tf.cast(indices, dtype=tf.int32)], [zeros, values])
+
+
+def matmul_gather_on_zeroth_axis(params, indices, scope=None):
+  """Matrix multiplication based implementation of tf.gather on zeroth axis.
+
+  TODO(rathodv, jonathanhuang): enable sparse matmul option.
+
+  Args:
+    params: A float32 Tensor. The tensor from which to gather values.
+      Must be at least rank 1.
+    indices: A Tensor. Must be one of the following types: int32, int64.
+      Must be in range [0, params.shape[0])
+    scope: A name for the operation (optional).
+
+  Returns:
+    A Tensor. Has the same type as params. Values from params gathered
+    from indices given by indices, with shape indices.shape + params.shape[1:].
+  """
+  with tf.name_scope(scope, 'MatMulGather'):
+    params_shape = shape_utils.combined_static_and_dynamic_shape(params)
+    indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
+    params2d = tf.reshape(params, [params_shape[0], -1])
+    indicator_matrix = tf.one_hot(indices, params_shape[0])
+    gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
+    return tf.reshape(gathered_result_flattened,
+                      tf.stack(indices_shape + params_shape[1:]))
--- a/official/vision/detection/utils/object_detection/preprocessor.py
+++ b/official/vision/detection/utils/object_detection/preprocessor.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Preprocess images and bounding boxes for detection.
+
+We perform two sets of operations in preprocessing stage:
+(a) operations that are applied to both training and testing data,
+(b) operations that are applied only to training data for the purpose of
+    data augmentation.
+
+A preprocessing function receives a set of inputs,
+e.g. an image and bounding boxes,
+performs an operation on them, and returns them.
+Some examples are: randomly cropping the image, randomly mirroring the image,
+                   randomly changing the brightness, contrast, hue and
+                   randomly jittering the bounding boxes.
+
+The image is a rank 4 tensor: [1, height, width, channels] with
+dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
+in each row there is a box with [ymin xmin ymax xmax].
+Boxes are in normalized coordinates meaning
+their coordinate values range in [0, 1]
+
+Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
+functions receive a rank 3 tensor for processing the image. Thus, inside the
+preprocess function we squeeze the image to become a rank 3 tensor and then
+we pass it to the functions. At the end of the preprocess we expand the image
+back to rank 4.
+"""
+
+import tensorflow.compat.v2 as tf
+
+from official.vision.detection.utils.object_detection import box_list
+
+
+def _flip_boxes_left_right(boxes):
+  """Left-right flip the boxes.
+
+  Args:
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+
+  Returns:
+    Flipped boxes.
+  """
+  ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+  flipped_xmin = tf.subtract(1.0, xmax)
+  flipped_xmax = tf.subtract(1.0, xmin)
+  flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
+  return flipped_boxes
+
+
+def _flip_masks_left_right(masks):
+  """Left-right flip masks.
+
+  Args:
+    masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+
+  Returns:
+    flipped masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+  """
+  return masks[:, :, ::-1]
+
+
+def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
+                             scope=None):
+  """Flips the keypoints horizontally around the flip_point.
+
+  This operation flips the x coordinate for each keypoint around the flip_point
+  and also permutes the keypoints in a manner specified by flip_permutation.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    flip_point:  (float) scalar tensor representing the x coordinate to flip the
+      keypoints around.
+    flip_permutation: rank 1 int32 tensor containing the keypoint flip
+      permutation. This specifies the mapping from original keypoint indices
+      to the flipped keypoint indices. This is used primarily for keypoints
+      that are not reflection invariant. E.g. Suppose there are 3 keypoints
+      representing ['head', 'right_eye', 'left_eye'], then a logical choice for
+      flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
+      and 'right_eye' after a horizontal flip.
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'FlipHorizontal'):
+    keypoints = tf.transpose(a=keypoints, perm=[1, 0, 2])
+    keypoints = tf.gather(keypoints, flip_permutation)
+    v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+    u = flip_point * 2.0 - u
+    new_keypoints = tf.concat([v, u], 2)
+    new_keypoints = tf.transpose(a=new_keypoints, perm=[1, 0, 2])
+    return new_keypoints
+
+
+def random_horizontal_flip(image,
+                           boxes=None,
+                           masks=None,
+                           keypoints=None,
+                           keypoint_flip_permutation=None,
+                           seed=None):
+  """Randomly flips the image and detections horizontally.
+
+  The probability of flipping the image is 50%.
+
+  Args:
+    image: rank 3 float32 tensor with shape [height, width, channels].
+    boxes: (optional) rank 2 float32 tensor with shape [N, 4]
+           containing the bounding boxes.
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
+                               permutation.
+    seed: random seed
+
+  Returns:
+    image: image which is the same shape as input image.
+
+    If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
+    the function also returns the following tensors.
+
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+
+  Raises:
+    ValueError: if keypoints are provided but keypoint_flip_permutation is not.
+  """
+
+  def _flip_image(image):
+    # flip image
+    image_flipped = tf.image.flip_left_right(image)
+    return image_flipped
+
+  if keypoints is not None and keypoint_flip_permutation is None:
+    raise ValueError(
+        'keypoints are provided but keypoints_flip_permutation is not provided')
+
+  with tf.name_scope('RandomHorizontalFlip'):
+    result = []
+    # random variable defining whether to do flip or not
+    do_a_flip_random = tf.greater(tf.random.uniform([], seed=seed), 0.5)
+
+    # flip image
+    image = tf.cond(
+        pred=do_a_flip_random,
+        true_fn=lambda: _flip_image(image),
+        false_fn=lambda: image)
+    result.append(image)
+
+    # flip boxes
+    if boxes is not None:
+      boxes = tf.cond(
+          pred=do_a_flip_random,
+          true_fn=lambda: _flip_boxes_left_right(boxes),
+          false_fn=lambda: boxes)
+      result.append(boxes)
+
+    # flip masks
+    if masks is not None:
+      masks = tf.cond(
+          pred=do_a_flip_random,
+          true_fn=lambda: _flip_masks_left_right(masks),
+          false_fn=lambda: masks)
+      result.append(masks)
+
+    # flip keypoints
+    if keypoints is not None and keypoint_flip_permutation is not None:
+      permutation = keypoint_flip_permutation
+      keypoints = tf.cond(
+          pred=do_a_flip_random,
+          true_fn=lambda: keypoint_flip_horizontal(keypoints, 0.5, permutation),
+          false_fn=lambda: keypoints)
+      result.append(keypoints)
+
+    return tuple(result)
+
+
+def _compute_new_static_size(image, min_dimension, max_dimension):
+  """Compute new static shape for resize_to_range method."""
+  image_shape = image.get_shape().as_list()
+  orig_height = image_shape[0]
+  orig_width = image_shape[1]
+  num_channels = image_shape[2]
+  orig_min_dim = min(orig_height, orig_width)
+  # Calculates the larger of the possible sizes
+  large_scale_factor = min_dimension / float(orig_min_dim)
+  # Scaling orig_(height|width) by large_scale_factor will make the smaller
+  # dimension equal to min_dimension, save for floating point rounding errors.
+  # For reasonably-sized images, taking the nearest integer will reliably
+  # eliminate this error.
+  large_height = int(round(orig_height * large_scale_factor))
+  large_width = int(round(orig_width * large_scale_factor))
+  large_size = [large_height, large_width]
+  if max_dimension:
+    # Calculates the smaller of the possible sizes, use that if the larger
+    # is too big.
+    orig_max_dim = max(orig_height, orig_width)
+    small_scale_factor = max_dimension / float(orig_max_dim)
+    # Scaling orig_(height|width) by small_scale_factor will make the larger
+    # dimension equal to max_dimension, save for floating point rounding
+    # errors. For reasonably-sized images, taking the nearest integer will
+    # reliably eliminate this error.
+    small_height = int(round(orig_height * small_scale_factor))
+    small_width = int(round(orig_width * small_scale_factor))
+    small_size = [small_height, small_width]
+    new_size = large_size
+    if max(large_size) > max_dimension:
+      new_size = small_size
+  else:
+    new_size = large_size
+  return tf.constant(new_size + [num_channels])
+
+
+def _compute_new_dynamic_size(image, min_dimension, max_dimension):
+  """Compute new dynamic shape for resize_to_range method."""
+  image_shape = tf.shape(input=image)
+  orig_height = tf.cast(image_shape[0], dtype=tf.float32)
+  orig_width = tf.cast(image_shape[1], dtype=tf.float32)
+  num_channels = image_shape[2]
+  orig_min_dim = tf.minimum(orig_height, orig_width)
+  # Calculates the larger of the possible sizes
+  min_dimension = tf.constant(min_dimension, dtype=tf.float32)
+  large_scale_factor = min_dimension / orig_min_dim
+  # Scaling orig_(height|width) by large_scale_factor will make the smaller
+  # dimension equal to min_dimension, save for floating point rounding errors.
+  # For reasonably-sized images, taking the nearest integer will reliably
+  # eliminate this error.
+  large_height = tf.cast(
+      tf.round(orig_height * large_scale_factor), dtype=tf.int32)
+  large_width = tf.cast(
+      tf.round(orig_width * large_scale_factor), dtype=tf.int32)
+  large_size = tf.stack([large_height, large_width])
+  if max_dimension:
+    # Calculates the smaller of the possible sizes, use that if the larger
+    # is too big.
+    orig_max_dim = tf.maximum(orig_height, orig_width)
+    max_dimension = tf.constant(max_dimension, dtype=tf.float32)
+    small_scale_factor = max_dimension / orig_max_dim
+    # Scaling orig_(height|width) by small_scale_factor will make the larger
+    # dimension equal to max_dimension, save for floating point rounding
+    # errors. For reasonably-sized images, taking the nearest integer will
+    # reliably eliminate this error.
+    small_height = tf.cast(
+        tf.round(orig_height * small_scale_factor), dtype=tf.int32)
+    small_width = tf.cast(
+        tf.round(orig_width * small_scale_factor), dtype=tf.int32)
+    small_size = tf.stack([small_height, small_width])
+    new_size = tf.cond(
+        pred=tf.cast(tf.reduce_max(input_tensor=large_size), dtype=tf.float32) >
+        max_dimension,
+        true_fn=lambda: small_size,
+        false_fn=lambda: large_size)
+  else:
+    new_size = large_size
+  return tf.stack(tf.unstack(new_size) + [num_channels])
+
+
+def resize_to_range(image,
+                    masks=None,
+                    min_dimension=None,
+                    max_dimension=None,
+                    method=tf.image.ResizeMethod.BILINEAR,
+                    align_corners=False,
+                    pad_to_max_dimension=False):
+  """Resizes an image so its dimensions are within the provided value.
+
+  The output size can be described by two cases:
+  1. If the image can be rescaled so its minimum dimension is equal to the
+     provided value without the other dimension exceeding max_dimension,
+     then do so.
+  2. Otherwise, resize so the largest dimension is equal to max_dimension.
+
+  Args:
+    image: A 3D tensor of shape [height, width, channels]
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks.
+    min_dimension: (optional) (scalar) desired size of the smaller image
+                   dimension.
+    max_dimension: (optional) (scalar) maximum allowed size
+                   of the larger image dimension.
+    method: (optional) interpolation method used in resizing. Defaults to
+            BILINEAR.
+    align_corners: bool. If true, exactly align all 4 corners of the input
+                   and output. Defaults to False.
+    pad_to_max_dimension: Whether to resize the image and pad it with zeros
+      so the resulting image is of the spatial size
+      [max_dimension, max_dimension]. If masks are included they are padded
+      similarly.
+
+  Returns:
+    Note that the position of the resized_image_shape changes based on whether
+    masks are present.
+    resized_image: A 3D tensor of shape [new_height, new_width, channels],
+      where the image has been resized (with bilinear interpolation) so that
+      min(new_height, new_width) == min_dimension or
+      max(new_height, new_width) == max_dimension.
+    resized_masks: If masks is not None, also outputs masks. A 3D tensor of
+      shape [num_instances, new_height, new_width].
+    resized_image_shape: A 1D tensor of shape [3] containing shape of the
+      resized image.
+
+  Raises:
+    ValueError: if the image is not a 3D tensor.
+  """
+  if len(image.get_shape()) != 3:
+    raise ValueError('Image should be 3D tensor')
+
+  with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
+    if image.get_shape().is_fully_defined():
+      new_size = _compute_new_static_size(image, min_dimension, max_dimension)
+    else:
+      new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
+    new_image = tf.image.resize(image, new_size[:-1], method=method)
+
+    if pad_to_max_dimension:
+      new_image = tf.image.pad_to_bounding_box(
+          new_image, 0, 0, max_dimension, max_dimension)
+
+    result = [new_image]
+    if masks is not None:
+      new_masks = tf.expand_dims(masks, 3)
+      new_masks = tf.image.resize(
+          new_masks,
+          new_size[:-1],
+          method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+      new_masks = tf.squeeze(new_masks, 3)
+      if pad_to_max_dimension:
+        new_masks = tf.image.pad_to_bounding_box(
+            new_masks, 0, 0, max_dimension, max_dimension)
+      result.append(new_masks)
+
+    result.append(new_size)
+    return result
+
+
+def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
+  """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
+
+  Args:
+    boxlist_to_copy_to: BoxList to which extra fields are copied.
+    boxlist_to_copy_from: BoxList from which fields are copied.
+
+  Returns:
+    boxlist_to_copy_to with extra fields.
+  """
+  for field in boxlist_to_copy_from.get_extra_fields():
+    boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
+  return boxlist_to_copy_to
+
+
+def box_list_scale(boxlist, y_scale, x_scale, scope=None):
+  """scale box coordinates in x and y dimensions.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    y_scale: (float) scalar tensor
+    x_scale: (float) scalar tensor
+    scope: name scope.
+
+  Returns:
+    boxlist: BoxList holding N boxes
+  """
+  with tf.name_scope(scope, 'Scale'):
+    y_scale = tf.cast(y_scale, tf.float32)
+    x_scale = tf.cast(x_scale, tf.float32)
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    y_min = y_scale * y_min
+    y_max = y_scale * y_max
+    x_min = x_scale * x_min
+    x_max = x_scale * x_max
+    scaled_boxlist = box_list.BoxList(
+        tf.concat([y_min, x_min, y_max, x_max], 1))
+    return _copy_extra_fields(scaled_boxlist, boxlist)
+
+
+def keypoint_scale(keypoints, y_scale, x_scale, scope=None):
+  """Scales keypoint coordinates in x and y dimensions.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    y_scale: (float) scalar tensor
+    x_scale: (float) scalar tensor
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'Scale'):
+    y_scale = tf.cast(y_scale, tf.float32)
+    x_scale = tf.cast(x_scale, tf.float32)
+    new_keypoints = keypoints * [[[y_scale, x_scale]]]
+    return new_keypoints
+
+
+def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
+  """Scales boxes from normalized to pixel coordinates.
+
+  Args:
+    image: A 3D float32 tensor of shape [height, width, channels].
+    boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
+      boxes in normalized coordinates. Each row is of the form
+      [ymin, xmin, ymax, xmax].
+    keypoints: (optional) rank 3 float32 tensor with shape
+      [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
+      coordinates.
+
+  Returns:
+    image: unchanged input image.
+    scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
+      bounding boxes in pixel coordinates.
+    scaled_keypoints: a 3D float32 tensor with shape
+      [num_instances, num_keypoints, 2] containing the keypoints in pixel
+      coordinates.
+  """
+  boxlist = box_list.BoxList(boxes)
+  image_height = tf.shape(input=image)[0]
+  image_width = tf.shape(input=image)[1]
+  scaled_boxes = box_list_scale(boxlist, image_height, image_width).get()
+  result = [image, scaled_boxes]
+  if keypoints is not None:
+    scaled_keypoints = keypoint_scale(keypoints, image_height, image_width)
+    result.append(scaled_keypoints)
+  return tuple(result)
--- a/official/vision/detection/utils/object_detection/region_similarity_calculator.py
+++ b/official/vision/detection/utils/object_detection/region_similarity_calculator.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Region Similarity Calculators for BoxLists.
+
+Region Similarity Calculators compare a pairwise measure of similarity
+between the boxes in two BoxLists.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow.compat.v2 as tf
+
+
+def area(boxlist, scope=None):
+  """Computes area of boxes.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N] representing box areas.
+  """
+  if not scope:
+    scope = 'Area'
+  with tf.name_scope(scope):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
+
+
+def intersection(boxlist1, boxlist2, scope=None):
+  """Compute pairwise intersection areas between boxes.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise intersections
+  """
+  if not scope:
+    scope = 'Intersection'
+  with tf.name_scope(scope):
+    y_min1, x_min1, y_max1, x_max1 = tf.split(
+        value=boxlist1.get(), num_or_size_splits=4, axis=1)
+    y_min2, x_min2, y_max2, x_max2 = tf.split(
+        value=boxlist2.get(), num_or_size_splits=4, axis=1)
+    all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(a=y_max2))
+    all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(a=y_min2))
+    intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
+    all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(a=x_max2))
+    all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(a=x_min2))
+    intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
+    return intersect_heights * intersect_widths
+
+
+def iou(boxlist1, boxlist2, scope=None):
+  """Computes pairwise intersection-over-union between box collections.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise iou scores.
+  """
+  if not scope:
+    scope = 'IOU'
+  with tf.name_scope(scope):
+    intersections = intersection(boxlist1, boxlist2)
+    areas1 = area(boxlist1)
+    areas2 = area(boxlist2)
+    unions = (
+        tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
+    return tf.where(
+        tf.equal(intersections, 0.0), tf.zeros_like(intersections),
+        tf.truediv(intersections, unions))
+
+
+class RegionSimilarityCalculator(object):
+  """Abstract base class for region similarity calculator."""
+  __metaclass__ = ABCMeta
+
+  def compare(self, boxlist1, boxlist2, scope=None):
+    """Computes matrix of pairwise similarity between BoxLists.
+
+    This op (to be overriden) computes a measure of pairwise similarity between
+    the boxes in the given BoxLists. Higher values indicate more similarity.
+
+    Note that this method simply measures similarity and does not explicitly
+    perform a matching.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+      scope: Op scope name. Defaults to 'Compare' if None.
+
+    Returns:
+      a (float32) tensor of shape [N, M] with pairwise similarity score.
+    """
+    if not scope:
+      scope = 'Compare'
+    with tf.name_scope(scope) as scope:
+      return self._compare(boxlist1, boxlist2)
+
+  @abstractmethod
+  def _compare(self, boxlist1, boxlist2):
+    pass
+
+
+class IouSimilarity(RegionSimilarityCalculator):
+  """Class to compute similarity based on Intersection over Union (IOU) metric.
+
+  This class computes pairwise similarity between two BoxLists based on IOU.
+  """
+
+  def _compare(self, boxlist1, boxlist2):
+    """Compute pairwise IOU similarity between the two BoxLists.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+
+    Returns:
+      A tensor with shape [N, M] representing pairwise iou scores.
+    """
+    return iou(boxlist1, boxlist2)
--- a/official/vision/detection/utils/object_detection/shape_utils.py
+++ b/official/vision/detection/utils/object_detection/shape_utils.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Utils used to manipulate tensor shapes."""
+
+import tensorflow.compat.v2 as tf
+
+
+def assert_shape_equal(shape_a, shape_b):
+  """Asserts that shape_a and shape_b are equal.
+
+  If the shapes are static, raises a ValueError when the shapes
+  mismatch.
+
+  If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes
+  mismatch.
+
+  Args:
+    shape_a: a list containing shape of the first tensor.
+    shape_b: a list containing shape of the second tensor.
+
+  Returns:
+    Either a tf.no_op() when shapes are all static and a tf.assert_equal() op
+    when the shapes are dynamic.
+
+  Raises:
+    ValueError: When shapes are both static and unequal.
+  """
+  if (all(isinstance(dim, int) for dim in shape_a) and
+      all(isinstance(dim, int) for dim in shape_b)):
+    if shape_a != shape_b:
+      raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b))
+    else: return tf.no_op()
+  else:
+    return tf.assert_equal(shape_a, shape_b)
+
+
+def combined_static_and_dynamic_shape(tensor):
+  """Returns a list containing static and dynamic values for the dimensions.
+
+  Returns a list of static and dynamic values for shape dimensions. This is
+  useful to preserve static shapes when available in reshape operation.
+
+  Args:
+    tensor: A tensor of any type.
+
+  Returns:
+    A list of size tensor.shape.ndims containing integers or a scalar tensor.
+  """
+  static_tensor_shape = tensor.shape.as_list()
+  dynamic_tensor_shape = tf.shape(input=tensor)
+  combined_shape = []
+  for index, dim in enumerate(static_tensor_shape):
+    if dim is not None:
+      combined_shape.append(dim)
+    else:
+      combined_shape.append(dynamic_tensor_shape[index])
+  return combined_shape
+
+
+def pad_or_clip_nd(tensor, output_shape):
+  """Pad or Clip given tensor to the output shape.
+
+  Args:
+    tensor: Input tensor to pad or clip.
+    output_shape: A list of integers / scalar tensors (or None for dynamic dim)
+      representing the size to pad or clip each dimension of the input tensor.
+
+  Returns:
+    Input tensor padded and clipped to the output shape.
+  """
+  tensor_shape = tf.shape(input=tensor)
+  clip_size = [
+      tf.where(tensor_shape[i] - shape > 0, shape, -1)
+      if shape is not None else -1 for i, shape in enumerate(output_shape)
+  ]
+  clipped_tensor = tf.slice(
+      tensor,
+      begin=tf.zeros(len(clip_size), dtype=tf.int32),
+      size=clip_size)
+
+  # Pad tensor if the shape of clipped tensor is smaller than the expected
+  # shape.
+  clipped_tensor_shape = tf.shape(input=clipped_tensor)
+  trailing_paddings = [
+      shape - clipped_tensor_shape[i] if shape is not None else 0
+      for i, shape in enumerate(output_shape)
+  ]
+  paddings = tf.stack(
+      [
+          tf.zeros(len(trailing_paddings), dtype=tf.int32),
+          trailing_paddings
+      ],
+      axis=1)
+  padded_tensor = tf.pad(tensor=clipped_tensor, paddings=paddings)
+  output_static_shape = [
+      dim if not isinstance(dim, tf.Tensor) else None for dim in output_shape
+  ]
+  padded_tensor.set_shape(output_static_shape)
+  return padded_tensor
--- a/official/vision/detection/utils/object_detection/target_assigner.py
+++ b/official/vision/detection/utils/object_detection/target_assigner.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base target assigner module.
+
+The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
+groundtruth detections (bounding boxes), to assign classification and regression
+targets to each anchor as well as weights to each anchor (specifying, e.g.,
+which anchors should not contribute to training loss).
+
+It assigns classification/regression targets by performing the following steps:
+1) Computing pairwise similarity between anchors and groundtruth boxes using a
+  provided RegionSimilarity Calculator
+2) Computing a matching based on the similarity matrix using a provided Matcher
+3) Assigning regression targets based on the matching and a provided BoxCoder
+4) Assigning classification targets based on the matching and groundtruth labels
+
+Note that TargetAssigners only operate on detections from a single
+image at a time, so any logic for applying a TargetAssigner to multiple
+images must be handled externally.
+"""
+import tensorflow.compat.v2 as tf
+
+from official.vision.detection.utils.object_detection import box_list
+from official.vision.detection.utils.object_detection import shape_utils
+
+
+KEYPOINTS_FIELD_NAME = 'keypoints'
+
+
+class TargetAssigner(object):
+  """Target assigner to compute classification and regression targets."""
+
+  def __init__(self, similarity_calc, matcher, box_coder,
+               negative_class_weight=1.0, unmatched_cls_target=None):
+    """Construct Object Detection Target Assigner.
+
+    Args:
+      similarity_calc: a RegionSimilarityCalculator
+      matcher: Matcher used to match groundtruth to anchors.
+      box_coder: BoxCoder used to encode matching groundtruth boxes with
+        respect to anchors.
+      negative_class_weight: classification weight to be associated to negative
+        anchors (default: 1.0). The weight must be in [0., 1.].
+      unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
+        which is consistent with the classification target for each
+        anchor (and can be empty for scalar targets).  This shape must thus be
+        compatible with the groundtruth labels that are passed to the "assign"
+        function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+        If set to None, unmatched_cls_target is set to be [0] for each anchor.
+
+    Raises:
+      ValueError: if similarity_calc is not a RegionSimilarityCalculator or
+        if matcher is not a Matcher or if box_coder is not a BoxCoder
+    """
+    self._similarity_calc = similarity_calc
+    self._matcher = matcher
+    self._box_coder = box_coder
+    self._negative_class_weight = negative_class_weight
+    if unmatched_cls_target is None:
+      self._unmatched_cls_target = tf.constant([0], tf.float32)
+    else:
+      self._unmatched_cls_target = unmatched_cls_target
+
+  @property
+  def box_coder(self):
+    return self._box_coder
+
+  def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
+             groundtruth_weights=None, **params):
+    """Assign classification and regression targets to each anchor.
+
+    For a given set of anchors and groundtruth detections, match anchors
+    to groundtruth_boxes and assign classification and regression targets to
+    each anchor as well as weights based on the resulting match (specifying,
+    e.g., which anchors should not contribute to training loss).
+
+    Anchors that are not matched to anything are given a classification target
+    of self._unmatched_cls_target which can be specified via the constructor.
+
+    Args:
+      anchors: a BoxList representing N anchors
+      groundtruth_boxes: a BoxList representing M groundtruth boxes
+      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
+        with labels for each of the ground_truth boxes. The subshape
+        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
+        to None, groundtruth_labels assumes a binary problem where all
+        ground_truth boxes get a positive label (of 1).
+      groundtruth_weights: a float tensor of shape [M] indicating the weight to
+        assign to all anchors match to a particular groundtruth box. The weights
+        must be in [0., 1.]. If None, all weights are set to 1.
+      **params: Additional keyword arguments for specific implementations of
+              the Matcher.
+
+    Returns:
+      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+      cls_weights: a float32 tensor with shape [num_anchors]
+      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
+      reg_weights: a float32 tensor with shape [num_anchors]
+      match: a matcher.Match object encoding the match between anchors and
+        groundtruth boxes, with rows corresponding to groundtruth boxes
+        and columns corresponding to anchors.
+
+    Raises:
+      ValueError: if anchors or groundtruth_boxes are not of type
+        box_list.BoxList
+    """
+    if not isinstance(anchors, box_list.BoxList):
+      raise ValueError('anchors must be an BoxList')
+    if not isinstance(groundtruth_boxes, box_list.BoxList):
+      raise ValueError('groundtruth_boxes must be an BoxList')
+
+    if groundtruth_labels is None:
+      groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
+                                                  0))
+      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
+    unmatched_shape_assert = shape_utils.assert_shape_equal(
+        shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
+        shape_utils.combined_static_and_dynamic_shape(
+            self._unmatched_cls_target))
+    labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
+        shape_utils.combined_static_and_dynamic_shape(
+            groundtruth_labels)[:1],
+        shape_utils.combined_static_and_dynamic_shape(
+            groundtruth_boxes.get())[:1])
+
+    if groundtruth_weights is None:
+      num_gt_boxes = groundtruth_boxes.num_boxes_static()
+      if not num_gt_boxes:
+        num_gt_boxes = groundtruth_boxes.num_boxes()
+      groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
+    with tf.control_dependencies(
+        [unmatched_shape_assert, labels_and_box_shapes_assert]):
+      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
+                                                           anchors)
+      match = self._matcher.match(match_quality_matrix, **params)
+      reg_targets = self._create_regression_targets(anchors,
+                                                    groundtruth_boxes,
+                                                    match)
+      cls_targets = self._create_classification_targets(groundtruth_labels,
+                                                        match)
+      reg_weights = self._create_regression_weights(match, groundtruth_weights)
+      cls_weights = self._create_classification_weights(match,
+                                                        groundtruth_weights)
+
+    num_anchors = anchors.num_boxes_static()
+    if num_anchors is not None:
+      reg_targets = self._reset_target_shape(reg_targets, num_anchors)
+      cls_targets = self._reset_target_shape(cls_targets, num_anchors)
+      reg_weights = self._reset_target_shape(reg_weights, num_anchors)
+      cls_weights = self._reset_target_shape(cls_weights, num_anchors)
+
+    return cls_targets, cls_weights, reg_targets, reg_weights, match
+
+  def _reset_target_shape(self, target, num_anchors):
+    """Sets the static shape of the target.
+
+    Args:
+      target: the target tensor. Its first dimension will be overwritten.
+      num_anchors: the number of anchors, which is used to override the target's
+        first dimension.
+
+    Returns:
+      A tensor with the shape info filled in.
+    """
+    target_shape = target.get_shape().as_list()
+    target_shape[0] = num_anchors
+    target.set_shape(target_shape)
+    return target
+
+  def _create_regression_targets(self, anchors, groundtruth_boxes, match):
+    """Returns a regression target for each anchor.
+
+    Args:
+      anchors: a BoxList representing N anchors
+      groundtruth_boxes: a BoxList representing M groundtruth_boxes
+      match: a matcher.Match object
+
+    Returns:
+      reg_targets: a float32 tensor with shape [N, box_code_dimension]
+    """
+    matched_gt_boxes = match.gather_based_on_match(
+        groundtruth_boxes.get(),
+        unmatched_value=tf.zeros(4),
+        ignored_value=tf.zeros(4))
+    matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
+    if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME):
+      groundtruth_keypoints = groundtruth_boxes.get_field(KEYPOINTS_FIELD_NAME)
+      matched_keypoints = match.gather_based_on_match(
+          groundtruth_keypoints,
+          unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
+          ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
+      matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME, matched_keypoints)
+    matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
+    match_results_shape = shape_utils.combined_static_and_dynamic_shape(
+        match.match_results)
+
+    # Zero out the unmatched and ignored regression targets.
+    unmatched_ignored_reg_targets = tf.tile(
+        self._default_regression_target(), [match_results_shape[0], 1])
+    matched_anchors_mask = match.matched_column_indicator()
+    # To broadcast matched_anchors_mask to the same shape as
+    # matched_reg_targets.
+    matched_anchors_mask = tf.tile(
+        tf.expand_dims(matched_anchors_mask, 1),
+        [1, tf.shape(matched_reg_targets)[1]])
+    reg_targets = tf.where(matched_anchors_mask, matched_reg_targets,
+                           unmatched_ignored_reg_targets)
+    return reg_targets
+
+  def _default_regression_target(self):
+    """Returns the default target for anchors to regress to.
+
+    Default regression targets are set to zero (though in
+    this implementation what these targets are set to should
+    not matter as the regression weight of any box set to
+    regress to the default target is zero).
+
+    Returns:
+      default_target: a float32 tensor with shape [1, box_code_dimension]
+    """
+    return tf.constant([self._box_coder.code_size*[0]], tf.float32)
+
+  def _create_classification_targets(self, groundtruth_labels, match):
+    """Create classification targets for each anchor.
+
+    Assign a classification target of for each anchor to the matching
+    groundtruth label that is provided by match.  Anchors that are not matched
+    to anything are given the target self._unmatched_cls_target
+
+    Args:
+      groundtruth_labels:  a tensor of shape [num_gt_boxes, d_1, ... d_k]
+        with labels for each of the ground_truth boxes. The subshape
+        [d_1, ... d_k] can be empty (corresponding to scalar labels).
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+
+    Returns:
+      a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
+      subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
+      shape [num_gt_boxes, d_1, d_2, ... d_k].
+    """
+    return match.gather_based_on_match(
+        groundtruth_labels,
+        unmatched_value=self._unmatched_cls_target,
+        ignored_value=self._unmatched_cls_target)
+
+  def _create_regression_weights(self, match, groundtruth_weights):
+    """Set regression weight for each anchor.
+
+    Only positive anchors are set to contribute to the regression loss, so this
+    method returns a weight of 1 for every positive anchor and 0 for every
+    negative anchor.
+
+    Args:
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+      groundtruth_weights: a float tensor of shape [M] indicating the weight to
+        assign to all anchors match to a particular groundtruth box.
+
+    Returns:
+      a float32 tensor with shape [num_anchors] representing regression weights.
+    """
+    return match.gather_based_on_match(
+        groundtruth_weights, ignored_value=0., unmatched_value=0.)
+
+  def _create_classification_weights(self,
+                                     match,
+                                     groundtruth_weights):
+    """Create classification weights for each anchor.
+
+    Positive (matched) anchors are associated with a weight of
+    positive_class_weight and negative (unmatched) anchors are associated with
+    a weight of negative_class_weight. When anchors are ignored, weights are set
+    to zero. By default, both positive/negative weights are set to 1.0,
+    but they can be adjusted to handle class imbalance (which is almost always
+    the case in object detection).
+
+    Args:
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+      groundtruth_weights: a float tensor of shape [M] indicating the weight to
+        assign to all anchors match to a particular groundtruth box.
+
+    Returns:
+      a float32 tensor with shape [num_anchors] representing classification
+      weights.
+    """
+    return match.gather_based_on_match(
+        groundtruth_weights,
+        ignored_value=0.,
+        unmatched_value=self._negative_class_weight)
+
+  def get_box_coder(self):
+    """Get BoxCoder of this TargetAssigner.
+
+    Returns:
+      BoxCoder object.
+    """
+    return self._box_coder
--- a/official/vision/detection/utils/object_detection/visualization_utils.py
+++ b/official/vision/detection/utils/object_detection/visualization_utils.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A set of functions that are used for visualization.
+
+These functions often receive an image, perform some visualization on the image.
+The functions do not return a value, instead they modify the image itself.
+
+"""
+import collections
+import functools
+# Set headless-friendly backend.
+import matplotlib; matplotlib.use('Agg')  # pylint: disable=multiple-statements
+import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
+import numpy as np
+import PIL.Image as Image
+import PIL.ImageColor as ImageColor
+import PIL.ImageDraw as ImageDraw
+import PIL.ImageFont as ImageFont
+import six
+import tensorflow.compat.v2 as tf
+
+from official.vision.detection.utils.object_detection import shape_utils
+
+
+_TITLE_LEFT_MARGIN = 10
+_TITLE_TOP_MARGIN = 10
+STANDARD_COLORS = [
+    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
+    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
+    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
+    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
+    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
+    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
+    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
+    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
+    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
+    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
+    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
+    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
+    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
+    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
+    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
+    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
+    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
+    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
+    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
+    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
+    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
+    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
+    'WhiteSmoke', 'Yellow', 'YellowGreen'
+]
+
+
+def save_image_array_as_png(image, output_path):
+  """Saves an image (represented as a numpy array) to PNG.
+
+  Args:
+    image: a numpy array with shape [height, width, 3].
+    output_path: path to which image should be written.
+  """
+  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
+  with tf.io.gfile.GFile(output_path, 'w') as fid:
+    image_pil.save(fid, 'PNG')
+
+
+def encode_image_array_as_png_str(image):
+  """Encodes a numpy array into a PNG string.
+
+  Args:
+    image: a numpy array with shape [height, width, 3].
+
+  Returns:
+    PNG encoded image string.
+  """
+  image_pil = Image.fromarray(np.uint8(image))
+  output = six.BytesIO()
+  image_pil.save(output, format='PNG')
+  png_string = output.getvalue()
+  output.close()
+  return png_string
+
+
+def draw_bounding_box_on_image_array(image,
+                                     ymin,
+                                     xmin,
+                                     ymax,
+                                     xmax,
+                                     color='red',
+                                     thickness=4,
+                                     display_str_list=(),
+                                     use_normalized_coordinates=True):
+  """Adds a bounding box to an image (numpy array).
+
+  Bounding box coordinates can be specified in either absolute (pixel) or
+  normalized coordinates by setting the use_normalized_coordinates argument.
+
+  Args:
+    image: a numpy array with shape [height, width, 3].
+    ymin: ymin of bounding box.
+    xmin: xmin of bounding box.
+    ymax: ymax of bounding box.
+    xmax: xmax of bounding box.
+    color: color to draw bounding box. Default is red.
+    thickness: line thickness. Default value is 4.
+    display_str_list: list of strings to display in box
+                      (each to be shown on its own line).
+    use_normalized_coordinates: If True (default), treat coordinates
+      ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
+      coordinates as absolute.
+  """
+  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
+  draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
+                             thickness, display_str_list,
+                             use_normalized_coordinates)
+  np.copyto(image, np.array(image_pil))
+
+
+def draw_bounding_box_on_image(image,
+                               ymin,
+                               xmin,
+                               ymax,
+                               xmax,
+                               color='red',
+                               thickness=4,
+                               display_str_list=(),
+                               use_normalized_coordinates=True):
+  """Adds a bounding box to an image.
+
+  Bounding box coordinates can be specified in either absolute (pixel) or
+  normalized coordinates by setting the use_normalized_coordinates argument.
+
+  Each string in display_str_list is displayed on a separate line above the
+  bounding box in black text on a rectangle filled with the input 'color'.
+  If the top of the bounding box extends to the edge of the image, the strings
+  are displayed below the bounding box.
+
+  Args:
+    image: a PIL.Image object.
+    ymin: ymin of bounding box.
+    xmin: xmin of bounding box.
+    ymax: ymax of bounding box.
+    xmax: xmax of bounding box.
+    color: color to draw bounding box. Default is red.
+    thickness: line thickness. Default value is 4.
+    display_str_list: list of strings to display in box
+                      (each to be shown on its own line).
+    use_normalized_coordinates: If True (default), treat coordinates
+      ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
+      coordinates as absolute.
+  """
+  draw = ImageDraw.Draw(image)
+  im_width, im_height = image.size
+  if use_normalized_coordinates:
+    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
+                                  ymin * im_height, ymax * im_height)
+  else:
+    (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
+  draw.line([(left, top), (left, bottom), (right, bottom),
+             (right, top), (left, top)], width=thickness, fill=color)
+  try:
+    font = ImageFont.truetype('arial.ttf', 24)
+  except IOError:
+    font = ImageFont.load_default()
+
+  # If the total height of the display strings added to the top of the bounding
+  # box exceeds the top of the image, stack the strings below the bounding box
+  # instead of above.
+  display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
+  # Each display_str has a top and bottom margin of 0.05x.
+  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
+
+  if top > total_display_str_height:
+    text_bottom = top
+  else:
+    text_bottom = bottom + total_display_str_height
+  # Reverse list and print from bottom to top.
+  for display_str in display_str_list[::-1]:
+    text_width, text_height = font.getsize(display_str)
+    margin = np.ceil(0.05 * text_height)
+    draw.rectangle(
+        [(left, text_bottom - text_height - 2 * margin), (left + text_width,
+                                                          text_bottom)],
+        fill=color)
+    draw.text(
+        (left + margin, text_bottom - text_height - margin),
+        display_str,
+        fill='black',
+        font=font)
+    text_bottom -= text_height - 2 * margin
+
+
+def draw_bounding_boxes_on_image_array(image,
+                                       boxes,
+                                       color='red',
+                                       thickness=4,
+                                       display_str_list_list=()):
+  """Draws bounding boxes on image (numpy array).
+
+  Args:
+    image: a numpy array object.
+    boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
+           The coordinates are in normalized format between [0, 1].
+    color: color to draw bounding box. Default is red.
+    thickness: line thickness. Default value is 4.
+    display_str_list_list: list of list of strings.
+                           a list of strings for each bounding box.
+                           The reason to pass a list of strings for a
+                           bounding box is that it might contain
+                           multiple labels.
+
+  Raises:
+    ValueError: if boxes is not a [N, 4] array
+  """
+  image_pil = Image.fromarray(image)
+  draw_bounding_boxes_on_image(image_pil, boxes, color, thickness,
+                               display_str_list_list)
+  np.copyto(image, np.array(image_pil))
+
+
+def draw_bounding_boxes_on_image(image,
+                                 boxes,
+                                 color='red',
+                                 thickness=4,
+                                 display_str_list_list=()):
+  """Draws bounding boxes on image.
+
+  Args:
+    image: a PIL.Image object.
+    boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
+           The coordinates are in normalized format between [0, 1].
+    color: color to draw bounding box. Default is red.
+    thickness: line thickness. Default value is 4.
+    display_str_list_list: list of list of strings.
+                           a list of strings for each bounding box.
+                           The reason to pass a list of strings for a
+                           bounding box is that it might contain
+                           multiple labels.
+
+  Raises:
+    ValueError: if boxes is not a [N, 4] array
+  """
+  boxes_shape = boxes.shape
+  if not boxes_shape:
+    return
+  if len(boxes_shape) != 2 or boxes_shape[1] != 4:
+    raise ValueError('Input must be of size [N, 4]')
+  for i in range(boxes_shape[0]):
+    display_str_list = ()
+    if display_str_list_list:
+      display_str_list = display_str_list_list[i]
+    draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2],
+                               boxes[i, 3], color, thickness, display_str_list)
+
+
+def _visualize_boxes(image, boxes, classes, scores, category_index, **kwargs):
+  return visualize_boxes_and_labels_on_image_array(
+      image, boxes, classes, scores, category_index=category_index, **kwargs)
+
+
+def _visualize_boxes_and_masks(image, boxes, classes, scores, masks,
+                               category_index, **kwargs):
+  return visualize_boxes_and_labels_on_image_array(
+      image,
+      boxes,
+      classes,
+      scores,
+      category_index=category_index,
+      instance_masks=masks,
+      **kwargs)
+
+
+def _visualize_boxes_and_keypoints(image, boxes, classes, scores, keypoints,
+                                   category_index, **kwargs):
+  return visualize_boxes_and_labels_on_image_array(
+      image,
+      boxes,
+      classes,
+      scores,
+      category_index=category_index,
+      keypoints=keypoints,
+      **kwargs)
+
+
+def _visualize_boxes_and_masks_and_keypoints(
+    image, boxes, classes, scores, masks, keypoints, category_index, **kwargs):
+  return visualize_boxes_and_labels_on_image_array(
+      image,
+      boxes,
+      classes,
+      scores,
+      category_index=category_index,
+      instance_masks=masks,
+      keypoints=keypoints,
+      **kwargs)
+
+
+def _resize_original_image(image, image_shape):
+  image = tf.expand_dims(image, 0)
+  image = tf.image.resize(
+      image, image_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+  return tf.cast(tf.squeeze(image, 0), tf.uint8)
+
+
+def draw_bounding_boxes_on_image_tensors(images,
+                                         boxes,
+                                         classes,
+                                         scores,
+                                         category_index,
+                                         original_image_spatial_shape=None,
+                                         true_image_shape=None,
+                                         instance_masks=None,
+                                         keypoints=None,
+                                         max_boxes_to_draw=20,
+                                         min_score_thresh=0.2,
+                                         use_normalized_coordinates=True):
+  """Draws bounding boxes, masks, and keypoints on batch of image tensors.
+
+  Args:
+    images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
+      channels will be ignored. If C = 1, then we convert the images to RGB
+      images.
+    boxes: [N, max_detections, 4] float32 tensor of detection boxes.
+    classes: [N, max_detections] int tensor of detection classes. Note that
+      classes are 1-indexed.
+    scores: [N, max_detections] float32 tensor of detection scores.
+    category_index: a dict that maps integer ids to category dicts. e.g.
+      {1: {1: 'dog'}, 2: {2: 'cat'}, ...}
+    original_image_spatial_shape: [N, 2] tensor containing the spatial size of
+      the original image.
+    true_image_shape: [N, 3] tensor containing the spatial size of unpadded
+      original_image.
+    instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
+      instance masks.
+    keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
+      with keypoints.
+    max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
+    min_score_thresh: Minimum score threshold for visualization. Default 0.2.
+    use_normalized_coordinates: Whether to assume boxes and kepoints are in
+      normalized coordinates (as opposed to absolute coordiantes).
+      Default is True.
+
+  Returns:
+    4D image tensor of type uint8, with boxes drawn on top.
+  """
+  # Additional channels are being ignored.
+  if images.shape[3] > 3:
+    images = images[:, :, :, 0:3]
+  elif images.shape[3] == 1:
+    images = tf.image.grayscale_to_rgb(images)
+  visualization_keyword_args = {
+      'use_normalized_coordinates': use_normalized_coordinates,
+      'max_boxes_to_draw': max_boxes_to_draw,
+      'min_score_thresh': min_score_thresh,
+      'agnostic_mode': False,
+      'line_thickness': 4
+  }
+  if true_image_shape is None:
+    true_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 3])
+  else:
+    true_shapes = true_image_shape
+  if original_image_spatial_shape is None:
+    original_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 2])
+  else:
+    original_shapes = original_image_spatial_shape
+
+  if instance_masks is not None and keypoints is None:
+    visualize_boxes_fn = functools.partial(
+        _visualize_boxes_and_masks,
+        category_index=category_index,
+        **visualization_keyword_args)
+    elems = [
+        true_shapes, original_shapes, images, boxes, classes, scores,
+        instance_masks
+    ]
+  elif instance_masks is None and keypoints is not None:
+    visualize_boxes_fn = functools.partial(
+        _visualize_boxes_and_keypoints,
+        category_index=category_index,
+        **visualization_keyword_args)
+    elems = [
+        true_shapes, original_shapes, images, boxes, classes, scores, keypoints
+    ]
+  elif instance_masks is not None and keypoints is not None:
+    visualize_boxes_fn = functools.partial(
+        _visualize_boxes_and_masks_and_keypoints,
+        category_index=category_index,
+        **visualization_keyword_args)
+    elems = [
+        true_shapes, original_shapes, images, boxes, classes, scores,
+        instance_masks, keypoints
+    ]
+  else:
+    visualize_boxes_fn = functools.partial(
+        _visualize_boxes,
+        category_index=category_index,
+        **visualization_keyword_args)
+    elems = [
+        true_shapes, original_shapes, images, boxes, classes, scores
+    ]
+
+  def draw_boxes(image_and_detections):
+    """Draws boxes on image."""
+    true_shape = image_and_detections[0]
+    original_shape = image_and_detections[1]
+    if true_image_shape is not None:
+      image = shape_utils.pad_or_clip_nd(
+          image_and_detections[2], [true_shape[0], true_shape[1], 3])
+    if original_image_spatial_shape is not None:
+      image_and_detections[2] = _resize_original_image(image, original_shape)
+
+    image_with_boxes = tf.compat.v1.py_func(visualize_boxes_fn,
+                                            image_and_detections[2:], tf.uint8)
+    return image_with_boxes
+
+  images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False)
+  return images
+
+
+def draw_keypoints_on_image_array(image,
+                                  keypoints,
+                                  color='red',
+                                  radius=2,
+                                  use_normalized_coordinates=True):
+  """Draws keypoints on an image (numpy array).
+
+  Args:
+    image: a numpy array with shape [height, width, 3].
+    keypoints: a numpy array with shape [num_keypoints, 2].
+    color: color to draw the keypoints with. Default is red.
+    radius: keypoint radius. Default value is 2.
+    use_normalized_coordinates: if True (default), treat keypoint values as
+      relative to the image.  Otherwise treat them as absolute.
+  """
+  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
+  draw_keypoints_on_image(image_pil, keypoints, color, radius,
+                          use_normalized_coordinates)
+  np.copyto(image, np.array(image_pil))
+
+
+def draw_keypoints_on_image(image,
+                            keypoints,
+                            color='red',
+                            radius=2,
+                            use_normalized_coordinates=True):
+  """Draws keypoints on an image.
+
+  Args:
+    image: a PIL.Image object.
+    keypoints: a numpy array with shape [num_keypoints, 2].
+    color: color to draw the keypoints with. Default is red.
+    radius: keypoint radius. Default value is 2.
+    use_normalized_coordinates: if True (default), treat keypoint values as
+      relative to the image.  Otherwise treat them as absolute.
+  """
+  draw = ImageDraw.Draw(image)
+  im_width, im_height = image.size
+  keypoints_x = [k[1] for k in keypoints]
+  keypoints_y = [k[0] for k in keypoints]
+  if use_normalized_coordinates:
+    keypoints_x = tuple([im_width * x for x in keypoints_x])
+    keypoints_y = tuple([im_height * y for y in keypoints_y])
+  for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y):
+    draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
+                  (keypoint_x + radius, keypoint_y + radius)],
+                 outline=color, fill=color)
+
+
+def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
+  """Draws mask on an image.
+
+  Args:
+    image: uint8 numpy array with shape (img_height, img_height, 3)
+    mask: a uint8 numpy array of shape (img_height, img_height) with
+      values between either 0 or 1.
+    color: color to draw the keypoints with. Default is red.
+    alpha: transparency value between 0 and 1. (default: 0.4)
+
+  Raises:
+    ValueError: On incorrect data type for image or masks.
+  """
+  if image.dtype != np.uint8:
+    raise ValueError('`image` not of type np.uint8')
+  if mask.dtype != np.uint8:
+    raise ValueError('`mask` not of type np.uint8')
+  if np.any(np.logical_and(mask != 1, mask != 0)):
+    raise ValueError('`mask` elements should be in [0, 1]')
+  if image.shape[:2] != mask.shape:
+    raise ValueError('The image has spatial dimensions %s but the mask has '
+                     'dimensions %s' % (image.shape[:2], mask.shape))
+  rgb = ImageColor.getrgb(color)
+  pil_image = Image.fromarray(image)
+
+  solid_color = np.expand_dims(
+      np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
+  pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
+  pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L')
+  pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
+  np.copyto(image, np.array(pil_image.convert('RGB')))
+
+
+def visualize_boxes_and_labels_on_image_array(
+    image,
+    boxes,
+    classes,
+    scores,
+    category_index,
+    instance_masks=None,
+    instance_boundaries=None,
+    keypoints=None,
+    use_normalized_coordinates=False,
+    max_boxes_to_draw=20,
+    min_score_thresh=.5,
+    agnostic_mode=False,
+    line_thickness=4,
+    groundtruth_box_visualization_color='black',
+    skip_scores=False,
+    skip_labels=False):
+  """Overlay labeled boxes on an image with formatted scores and label names.
+
+  This function groups boxes that correspond to the same location
+  and creates a display string for each detection and overlays these
+  on the image. Note that this function modifies the image in place, and returns
+  that same image.
+
+  Args:
+    image: uint8 numpy array with shape (img_height, img_width, 3)
+    boxes: a numpy array of shape [N, 4]
+    classes: a numpy array of shape [N]. Note that class indices are 1-based,
+      and match the keys in the label map.
+    scores: a numpy array of shape [N] or None.  If scores=None, then
+      this function assumes that the boxes to be plotted are groundtruth
+      boxes and plot all boxes as black with no classes or scores.
+    category_index: a dict containing category dictionaries (each holding
+      category index `id` and category name `name`) keyed by category indices.
+    instance_masks: a numpy array of shape [N, image_height, image_width] with
+      values ranging between 0 and 1, can be None.
+    instance_boundaries: a numpy array of shape [N, image_height, image_width]
+      with values ranging between 0 and 1, can be None.
+    keypoints: a numpy array of shape [N, num_keypoints, 2], can
+      be None
+    use_normalized_coordinates: whether boxes is to be interpreted as
+      normalized coordinates or not.
+    max_boxes_to_draw: maximum number of boxes to visualize.  If None, draw
+      all boxes.
+    min_score_thresh: minimum score threshold for a box to be visualized
+    agnostic_mode: boolean (default: False) controlling whether to evaluate in
+      class-agnostic mode or not.  This mode will display scores but ignore
+      classes.
+    line_thickness: integer (default: 4) controlling line width of the boxes.
+    groundtruth_box_visualization_color: box color for visualizing groundtruth
+      boxes
+    skip_scores: whether to skip score when drawing a single detection
+    skip_labels: whether to skip label when drawing a single detection
+
+  Returns:
+    uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
+  """
+  # Create a display string (and color) for every box location, group any boxes
+  # that correspond to the same location.
+  box_to_display_str_map = collections.defaultdict(list)
+  box_to_color_map = collections.defaultdict(str)
+  box_to_instance_masks_map = {}
+  box_to_instance_boundaries_map = {}
+  box_to_keypoints_map = collections.defaultdict(list)
+  if not max_boxes_to_draw:
+    max_boxes_to_draw = boxes.shape[0]
+  for i in range(min(max_boxes_to_draw, boxes.shape[0])):
+    if scores is None or scores[i] > min_score_thresh:
+      box = tuple(boxes[i].tolist())
+      if instance_masks is not None:
+        box_to_instance_masks_map[box] = instance_masks[i]
+      if instance_boundaries is not None:
+        box_to_instance_boundaries_map[box] = instance_boundaries[i]
+      if keypoints is not None:
+        box_to_keypoints_map[box].extend(keypoints[i])
+      if scores is None:
+        box_to_color_map[box] = groundtruth_box_visualization_color
+      else:
+        display_str = ''
+        if not skip_labels:
+          if not agnostic_mode:
+            if classes[i] in category_index.keys():
+              class_name = category_index[classes[i]]['name']
+            else:
+              class_name = 'N/A'
+            display_str = str(class_name)
+        if not skip_scores:
+          if not display_str:
+            display_str = '{}%'.format(int(100*scores[i]))
+          else:
+            display_str = '{}: {}%'.format(display_str, int(100*scores[i]))
+        box_to_display_str_map[box].append(display_str)
+        if agnostic_mode:
+          box_to_color_map[box] = 'DarkOrange'
+        else:
+          box_to_color_map[box] = STANDARD_COLORS[
+              classes[i] % len(STANDARD_COLORS)]
+
+  # Draw all boxes onto image.
+  for box, color in box_to_color_map.items():
+    ymin, xmin, ymax, xmax = box
+    if instance_masks is not None:
+      draw_mask_on_image_array(
+          image,
+          box_to_instance_masks_map[box],
+          color=color
+      )
+    if instance_boundaries is not None:
+      draw_mask_on_image_array(
+          image,
+          box_to_instance_boundaries_map[box],
+          color='red',
+          alpha=1.0
+      )
+    draw_bounding_box_on_image_array(
+        image,
+        ymin,
+        xmin,
+        ymax,
+        xmax,
+        color=color,
+        thickness=line_thickness,
+        display_str_list=box_to_display_str_map[box],
+        use_normalized_coordinates=use_normalized_coordinates)
+    if keypoints is not None:
+      draw_keypoints_on_image_array(
+          image,
+          box_to_keypoints_map[box],
+          color=color,
+          radius=line_thickness / 2,
+          use_normalized_coordinates=use_normalized_coordinates)
+
+  return image
+
+
+def add_cdf_image_summary(values, name):
+  """Adds a tf.summary.image for a CDF plot of the values.
+
+  Normalizes `values` such that they sum to 1, plots the cumulative distribution
+  function and creates a tf image summary.
+
+  Args:
+    values: a 1-D float32 tensor containing the values.
+    name: name for the image summary.
+  """
+  def cdf_plot(values):
+    """Numpy function to plot CDF."""
+    normalized_values = values / np.sum(values)
+    sorted_values = np.sort(normalized_values)
+    cumulative_values = np.cumsum(sorted_values)
+    fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32)
+                            / cumulative_values.size)
+    fig = plt.figure(frameon=False)
+    ax = fig.add_subplot('111')
+    ax.plot(fraction_of_examples, cumulative_values)
+    ax.set_ylabel('cumulative normalized values')
+    ax.set_xlabel('fraction of examples')
+    fig.canvas.draw()
+    width, height = fig.get_size_inches() * fig.get_dpi()
+    image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(
+        1, int(height), int(width), 3)
+    return image
+
+  cdf_plot = tf.compat.v1.py_func(cdf_plot, [values], tf.uint8)
+  tf.compat.v1.summary.image(name, cdf_plot)
+
+
+def add_hist_image_summary(values, bins, name):
+  """Adds a tf.summary.image for a histogram plot of the values.
+
+  Plots the histogram of values and creates a tf image summary.
+
+  Args:
+    values: a 1-D float32 tensor containing the values.
+    bins: bin edges which will be directly passed to np.histogram.
+    name: name for the image summary.
+  """
+
+  def hist_plot(values, bins):
+    """Numpy function to plot hist."""
+    fig = plt.figure(frameon=False)
+    ax = fig.add_subplot('111')
+    y, x = np.histogram(values, bins=bins)
+    ax.plot(x[:-1], y)
+    ax.set_ylabel('count')
+    ax.set_xlabel('value')
+    fig.canvas.draw()
+    width, height = fig.get_size_inches() * fig.get_dpi()
+    image = np.fromstring(
+        fig.canvas.tostring_rgb(), dtype='uint8').reshape(
+            1, int(height), int(width), 3)
+    return image
+
+  hist_plot = tf.compat.v1.py_func(hist_plot, [values, bins], tf.uint8)
+  tf.compat.v1.summary.image(name, hist_plot)
--- a/official/vision/detection/utils/segm_utils.py
+++ b/official/vision/detection/utils/segm_utils.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions for segmentations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import cv2
+
+
+def segm_results(masks, detections, image_height, image_width):
+  """Generates segmentation results."""
+
+  def expand_boxes(boxes, scale):
+    """Expands an array of boxes by a given scale."""
+    # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227  # pylint: disable=line-too-long
+    # The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
+    # whereas `boxes` here is in [x1, y1, w, h] form
+    w_half = boxes[:, 2] * .5
+    h_half = boxes[:, 3] * .5
+    x_c = boxes[:, 0] + w_half
+    y_c = boxes[:, 1] + h_half
+
+    w_half *= scale
+    h_half *= scale
+
+    boxes_exp = np.zeros(boxes.shape)
+    boxes_exp[:, 0] = x_c - w_half
+    boxes_exp[:, 2] = x_c + w_half
+    boxes_exp[:, 1] = y_c - h_half
+    boxes_exp[:, 3] = y_c + h_half
+
+    return boxes_exp
+
+  # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812  # pylint: disable=line-too-long
+  # To work around an issue with cv2.resize (it seems to automatically pad
+  # with repeated border values), we manually zero-pad the masks by 1 pixel
+  # prior to resizing back to the original image resolution. This prevents
+  # "top hat" artifacts. We therefore need to expand the reference boxes by an
+  # appropriate factor.
+  mask_size = masks.shape[2]
+  scale = (mask_size + 2.0) / mask_size
+
+  ref_boxes = expand_boxes(detections[:, 1:5], scale)
+  ref_boxes = ref_boxes.astype(np.int32)
+  padded_mask = np.zeros((mask_size + 2, mask_size + 2), dtype=np.float32)
+  segms = []
+  for mask_ind, mask in enumerate(masks):
+    padded_mask[1:-1, 1:-1] = mask[:, :]
+
+    ref_box = ref_boxes[mask_ind, :]
+    w = ref_box[2] - ref_box[0] + 1
+    h = ref_box[3] - ref_box[1] + 1
+    w = np.maximum(w, 1)
+    h = np.maximum(h, 1)
+
+    mask = cv2.resize(padded_mask, (w, h))
+    mask = np.array(mask > 0.5, dtype=np.uint8)
+    im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
+
+    x_0 = max(ref_box[0], 0)
+    x_1 = min(ref_box[2] + 1, image_width)
+    y_0 = max(ref_box[1], 0)
+    y_1 = min(ref_box[3] + 1, image_height)
+
+    im_mask[y_0:y_1, x_0:x_1] = mask[
+        (y_0 - ref_box[1]):(y_1 - ref_box[1]),
+        (x_0 - ref_box[0]):(x_1 - ref_box[0])
+    ]
+    segms.append(im_mask)
+
+  segms = np.array(segms)
+  assert masks.shape[0] == segms.shape[0]
+  return segms
+
--- a/official/vision/detection/utils/spatial_transform.py
+++ b/official/vision/detection/utils/spatial_transform.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions to performa spatial transformation for Tensor."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.compat.v2 as tf
+
+
+_EPSILON = 1e-8
+
+
+def nearest_upsampling(data, scale):
+  """Nearest neighbor upsampling implementation.
+
+  Args:
+    data: A tensor with a shape of [batch, height_in, width_in, channels].
+    scale: An integer multiple to scale resolution of input data.
+  Returns:
+    data_up: A tensor with a shape of
+      [batch, height_in*scale, width_in*scale, channels]. Same dtype as input
+      data.
+  """
+  with tf.name_scope('nearest_upsampling'):
+    bs, _, _, c = data.get_shape().as_list()
+    shape = tf.shape(input=data)
+    h = shape[1]
+    w = shape[2]
+    bs = -1 if bs is None else bs
+    # Uses reshape to quickly upsample the input.  The nearest pixel is selected
+    # implicitly via broadcasting.
+    data = tf.reshape(data, [bs, h, 1, w, 1, c]) * tf.ones(
+        [1, 1, scale, 1, scale, 1], dtype=data.dtype)
+    return tf.reshape(data, [bs, h * scale, w * scale, c])
+
+
+def selective_crop_and_resize(features,
+                              boxes,
+                              box_levels,
+                              boundaries,
+                              output_size=7,
+                              sample_offset=0.5):
+  """Crop and resize boxes on a set of feature maps.
+
+  Given multiple features maps indexed by different levels, and a set of boxes
+  where each box is mapped to a certain level, it selectively crops and resizes
+  boxes from the corresponding feature maps to generate the box features.
+
+  We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf,
+  figure 3 for reference). Specifically, for each feature map, we select an
+  (output_size, output_size) set of pixels corresponding to the box location,
+  and then use bilinear interpolation to select the feature value for each
+  pixel.
+
+  For performance, we perform the gather and interpolation on all layers as a
+  single operation. This is op the multi-level features are first stacked and
+  gathered into [2*output_size, 2*output_size] feature points. Then bilinear
+  interpolation is performed on the gathered feature points to generate
+  [output_size, output_size] RoIAlign feature map.
+
+  Here is the step-by-step algorithm:
+    1. The multi-level features are gathered into a
+       [batch_size, num_boxes, output_size*2, output_size*2, num_filters]
+       Tensor. The Tensor contains four neighboring feature points for each
+       vertice in the output grid.
+    2. Compute the interpolation kernel of shape
+       [batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis
+       can be seen as stacking 2x2 interpolation kernels for all vertices in the
+       output grid.
+    3. Element-wise multiply the gathered features and interpolation kernel.
+       Then apply 2x2 average pooling to reduce spatial dimension to
+       output_size.
+
+  Args:
+    features: a 5-D tensor of shape
+      [batch_size, num_levels, max_height, max_width, num_filters] where
+      cropping and resizing are based.
+    boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
+      information of each box w.r.t. the corresponding feature map.
+      boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
+      corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
+      in terms of the number of pixels of the corresponding feature map size.
+    box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing
+      the 0-based corresponding feature level index of each box.
+    boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
+      the boundary (in (y, x)) of the corresponding feature map for each box.
+      Any resampled grid points that go beyond the bounary will be clipped.
+    output_size: a scalar indicating the output crop size.
+    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
+      from grid point.
+
+  Returns:
+    features_per_box: a 5-D tensor of shape
+      [batch_size, num_boxes, output_size, output_size, num_filters]
+      representing the cropped features.
+  """
+  (batch_size, num_levels, max_feature_height, max_feature_width,
+   num_filters) = features.get_shape().as_list()
+  _, num_boxes, _ = boxes.get_shape().as_list()
+
+  # Compute the grid position w.r.t. the corresponding feature map.
+  box_grid_x = []
+  box_grid_y = []
+  for i in range(output_size):
+    box_grid_x.append(boxes[:, :, 1] +
+                      (i + sample_offset) * boxes[:, :, 3] / output_size)
+    box_grid_y.append(boxes[:, :, 0] +
+                      (i + sample_offset) * boxes[:, :, 2] / output_size)
+  box_grid_x = tf.stack(box_grid_x, axis=2)
+  box_grid_y = tf.stack(box_grid_y, axis=2)
+
+  # Compute indices for gather operation.
+  box_grid_y0 = tf.floor(box_grid_y)
+  box_grid_x0 = tf.floor(box_grid_x)
+  box_grid_x0 = tf.maximum(0., box_grid_x0)
+  box_grid_y0 = tf.maximum(0., box_grid_y0)
+  box_gridx0x1 = tf.stack(
+      [tf.minimum(box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1)),
+       tf.minimum(box_grid_x0 + 1, tf.expand_dims(boundaries[:, :, 1], -1))],
+      axis=3)
+  box_gridy0y1 = tf.stack(
+      [tf.minimum(box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1)),
+       tf.minimum(box_grid_y0 + 1, tf.expand_dims(boundaries[:, :, 0], -1))],
+      axis=3)
+
+  x_indices = tf.cast(
+      tf.reshape(box_gridx0x1,
+                 [batch_size, num_boxes, output_size * 2]), dtype=tf.int32)
+  y_indices = tf.cast(
+      tf.reshape(box_gridy0y1,
+                 [batch_size, num_boxes, output_size * 2]), dtype=tf.int32)
+
+  height_dim_offset = max_feature_width
+  level_dim_offset = max_feature_height * height_dim_offset
+  batch_dim_offset = num_levels * level_dim_offset
+  indices = tf.reshape(
+      tf.tile(tf.reshape(tf.range(batch_size) * batch_dim_offset,
+                         [batch_size, 1, 1, 1]),
+              [1, num_boxes, output_size * 2, output_size * 2]) +
+      tf.tile(tf.reshape(box_levels * level_dim_offset,
+                         [batch_size, num_boxes, 1, 1]),
+              [1, 1, output_size * 2, output_size * 2]) +
+      tf.tile(tf.reshape(y_indices * height_dim_offset,
+                         [batch_size, num_boxes, output_size * 2, 1]),
+              [1, 1, 1, output_size * 2]) +
+      tf.tile(tf.reshape(x_indices,
+                         [batch_size, num_boxes, 1, output_size * 2]),
+              [1, 1, output_size * 2, 1]), [-1])
+
+  features = tf.reshape(features, [-1, num_filters])
+  features_per_box = tf.reshape(
+      tf.gather(features, indices),
+      [batch_size, num_boxes, output_size * 2, output_size * 2, num_filters])
+
+  # The RoIAlign feature f can be computed by bilinear interpolation of four
+  # neighboring feature points f0, f1, f2, and f3.
+  # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
+  #                       [f10, f11]]
+  # f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11
+  # f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11
+  ly = box_grid_y - box_grid_y0
+  lx = box_grid_x - box_grid_x0
+  hy = 1.0 - ly
+  hx = 1.0 - lx
+  kernel_x = tf.reshape(tf.stack([hx, lx], axis=3),
+                        [batch_size, num_boxes, 1, output_size*2])
+  kernel_y = tf.reshape(tf.stack([hy, ly], axis=3),
+                        [batch_size, num_boxes, output_size*2, 1])
+  # Uses implicit broadcast to generate the interpolation kernel. The
+  # multiplier `4` is for avg pooling.
+  interpolation_kernel = kernel_y * kernel_x * 4
+
+  # Interpolates the gathered features with computed interpolation kernels.
+  features_per_box *= tf.cast(
+      tf.expand_dims(interpolation_kernel, axis=4),
+      dtype=features_per_box.dtype)
+  features_per_box = tf.reshape(
+      features_per_box,
+      [batch_size * num_boxes, output_size*2, output_size*2, num_filters])
+  features_per_box = tf.nn.avg_pool2d(
+      input=features_per_box,
+      ksize=[1, 2, 2, 1],
+      strides=[1, 2, 2, 1],
+      padding='VALID')
+  features_per_box = tf.reshape(
+      features_per_box,
+      [batch_size, num_boxes, output_size, output_size, num_filters])
+
+  return features_per_box
+
+
+def multilevel_crop_and_resize(features, boxes, output_size=7):
+  """Crop and resize on multilevel feature pyramid.
+
+  Generate the (output_size, output_size) set of pixels for each input box
+  by first locating the box into the correct feature level, and then cropping
+  and resizing it using the correspoding feature map of that level.
+
+  Args:
+    features: A dictionary with key as pyramid level and value as features.
+      The features are in shape of [batch_size, height_l, width_l, num_filters].
+    boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row
+      represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
+    output_size: A scalar to indicate the output crop size.
+
+  Returns:
+    A 5-D tensor representing feature crop of shape
+    [batch_size, num_boxes, output_size, output_size, num_filters].
+  """
+  with tf.name_scope('multilevel_crop_and_resize'):
+    levels = features.keys()
+    min_level = min(levels)
+    max_level = max(levels)
+    _, max_feature_height, max_feature_width, _ = (
+        features[min_level].get_shape().as_list())
+    # Stacks feature pyramid into a features_all of shape
+    # [batch_size, levels, height, width, num_filters].
+    features_all = []
+    for level in range(min_level, max_level + 1):
+      features_all.append(tf.image.pad_to_bounding_box(
+          features[level], 0, 0, max_feature_height, max_feature_width))
+    features_all = tf.stack(features_all, axis=1)
+
+    # Assigns boxes to the right level.
+    box_width = boxes[:, :, 3] - boxes[:, :, 1]
+    box_height = boxes[:, :, 2] - boxes[:, :, 0]
+    areas_sqrt = tf.sqrt(box_height * box_width)
+    levels = tf.cast(
+        tf.math.floordiv(
+            tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0))
+        + 4.0,
+        dtype=tf.int32)
+    # Maps levels between [min_level, max_level].
+    levels = tf.minimum(max_level, tf.maximum(levels, min_level))
+
+    # Projects box location and sizes to corresponding feature levels.
+    scale_to_level = tf.cast(
+        tf.pow(tf.constant(2.0), tf.cast(levels, tf.float32)),
+        dtype=boxes.dtype)
+    boxes /= tf.expand_dims(scale_to_level, axis=2)
+    box_width /= scale_to_level
+    box_height /= scale_to_level
+    boxes = tf.concat([boxes[:, :, 0:2],
+                       tf.expand_dims(box_height, -1),
+                       tf.expand_dims(box_width, -1)], axis=-1)
+
+    # Maps levels to [0, max_level-min_level].
+    levels -= min_level
+    level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32))
+    boundary = tf.cast(
+        tf.concat([
+            tf.expand_dims([[tf.cast(max_feature_height, tf.float32)]] /
+                           level_strides - 1,
+                           axis=-1),
+            tf.expand_dims([[tf.cast(max_feature_width, tf.float32)]] /
+                           level_strides - 1,
+                           axis=-1),
+        ], axis=-1),
+        boxes.dtype)
+
+    return selective_crop_and_resize(
+        features_all, boxes, levels, boundary, output_size)
+
+
+def single_level_feature_crop(features, level_boxes, detection_prior_levels,
+                              min_mask_level, mask_crop_size):
+  """Crop the FPN features at the appropriate levels for each detection.
+
+
+  Args:
+    features: a float tensor of shape [batch_size, num_levels,
+      max_feature_size, max_feature_size, num_downsample_channels].
+    level_boxes: a float Tensor of the level boxes to crop from.
+        [batch_size, num_instances, 4].
+    detection_prior_levels: an int Tensor of instance assigned level of shape
+        [batch_size, num_instances].
+    min_mask_level: minimum FPN level to crop mask feature from.
+    mask_crop_size: an int of mask crop size.
+
+  Returns:
+    crop_features: a float Tensor of shape [batch_size * num_instances,
+        mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
+        instance feature crop.
+  """
+  (batch_size, num_levels, max_feature_size,
+   _, num_downsample_channels) = features.get_shape().as_list()
+  _, num_of_instances, _ = level_boxes.get_shape().as_list()
+  level_boxes = tf.cast(level_boxes, tf.int32)
+  assert num_of_instances == detection_prior_levels.get_shape().as_list()[1]
+
+  x_start_indices = level_boxes[:, :, 1]
+  y_start_indices = level_boxes[:, :, 0]
+  # generate the full indices (not just the starting index)
+  x_idx_list = []
+  y_idx_list = []
+  for i in range(mask_crop_size):
+    x_idx_list.append(x_start_indices + i)
+    y_idx_list.append(y_start_indices + i)
+
+  x_indices = tf.stack(x_idx_list, axis=2)
+  y_indices = tf.stack(y_idx_list, axis=2)
+  levels = detection_prior_levels - min_mask_level
+  height_dim_size = max_feature_size
+  level_dim_size = max_feature_size * height_dim_size
+  batch_dim_size = num_levels * level_dim_size
+  # TODO(weicheng) change this to gather_nd for better readability.
+  indices = tf.reshape(
+      tf.tile(
+          tf.reshape(
+              tf.range(batch_size) * batch_dim_size,
+              [batch_size, 1, 1, 1]),
+          [1, num_of_instances,
+           mask_crop_size, mask_crop_size]) +
+      tf.tile(
+          tf.reshape(levels * level_dim_size,
+                     [batch_size, num_of_instances, 1, 1]),
+          [1, 1, mask_crop_size, mask_crop_size]) +
+      tf.tile(
+          tf.reshape(y_indices * height_dim_size,
+                     [batch_size, num_of_instances,
+                      mask_crop_size, 1]),
+          [1, 1, 1, mask_crop_size]) +
+      tf.tile(
+          tf.reshape(x_indices,
+                     [batch_size, num_of_instances,
+                      1, mask_crop_size]),
+          [1, 1, mask_crop_size, 1]), [-1])
+
+  features_r2 = tf.reshape(features,
+                           [-1, num_downsample_channels])
+  crop_features = tf.reshape(
+      tf.gather(features_r2, indices),
+      [batch_size * num_of_instances,
+       mask_crop_size, mask_crop_size,
+       num_downsample_channels])
+
+  return crop_features
+
+
+def crop_mask_in_target_box(masks, boxes, target_boxes, output_size):
+  """Crop masks in target boxes.
+
+  Args:
+    masks: A tensor with a shape of [batch_size, num_masks, height, width].
+    boxes: a float tensor representing box cooridnates that tightly enclose
+      masks with a shape of [batch_size, num_masks, 4] in un-normalized
+      coordinates. A box is represented by [ymin, xmin, ymax, xmax].
+    target_boxes: a float tensor representing target box cooridnates for
+      masks with a shape of [batch_size, num_masks, 4] in un-normalized
+      coordinates. A box is represented by [ymin, xmin, ymax, xmax].
+    output_size: A scalar to indicate the output crop size. It currently only
+      supports to output a square shape outputs.
+
+  Returns:
+    A 4-D tensor representing feature crop of shape
+    [batch_size, num_boxes, output_size, output_size].
+  """
+  with tf.name_scope('crop_mask_in_target_box'):
+    batch_size, num_masks, height, width = masks.get_shape().as_list()
+    masks = tf.reshape(masks, [batch_size*num_masks, height, width, 1])
+    # Pad zeros on the boundary of masks.
+    masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4, width + 4)
+    masks = tf.reshape(masks, [batch_size, num_masks, height+4, width+4, 1])
+
+    # Projects target box locations and sizes to corresponding cropped
+    # mask coordinates.
+    gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(
+        value=boxes, num_or_size_splits=4, axis=2)
+    bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(
+        value=target_boxes, num_or_size_splits=4, axis=2)
+    y_transform = (bb_y_min - gt_y_min) * height / (
+        gt_y_max - gt_y_min + _EPSILON) + 2
+    x_transform = (bb_x_min - gt_x_min) * height / (
+        gt_x_max - gt_x_min + _EPSILON) + 2
+    h_transform = (bb_y_max - bb_y_min) * width / (
+        gt_y_max - gt_y_min + _EPSILON)
+    w_transform = (bb_x_max - bb_x_min) * width / (
+        gt_x_max - gt_x_min + _EPSILON)
+
+    boundaries = tf.concat([
+        tf.cast(
+            tf.ones_like(y_transform) * ((height + 4) - 1), dtype=tf.float32),
+        tf.cast(
+            tf.ones_like(x_transform) * ((width + 4) - 1), dtype=tf.float32)
+    ],
+                           axis=-1)
+
+    # Reshape tensors to have the right shape for selective_crop_and_resize.
+    trasnformed_boxes = tf.concat(
+        [y_transform, x_transform, h_transform, w_transform], -1)
+    levels = tf.tile(tf.reshape(tf.range(num_masks), [1, num_masks]),
+                     [batch_size, 1])
+
+    cropped_masks = selective_crop_and_resize(
+        masks,
+        trasnformed_boxes,
+        levels,
+        boundaries,
+        output_size,
+        sample_offset=0)
+    cropped_masks = tf.squeeze(cropped_masks, axis=-1)
+
+  return cropped_masks