Internal change

PiperOrigin-RevId: 344134923

Internal change
PiperOrigin-RevId: 344134923
ff47e0d6 · A. Unique TensorFlower · 5a4a3ac3 · ff47e0d6 · ff47e0d6 · ff47e0d6
Commit ff47e0d6 authored Nov 24, 2020 by A. Unique TensorFlower
5 changed files
--- a/official/vision/detection/dataloader/anchor.py
+++ b/official/vision/detection/dataloader/anchor.py
@@ -22,6 +22,7 @@ import collections
 import tensorflow as tf
 from official.vision import keras_cv
+from official.vision.detection.utils import box_utils
 from official.vision.detection.utils.object_detection import argmax_matcher
 from official.vision.detection.utils.object_detection import balanced_positive_negative_sampler
 from official.vision.detection.utils.object_detection import box_list
@@ -290,3 +291,168 @@ class RpnAnchorLabeler(AnchorLabeler):
    box_targets_dict = self._anchor.unpack_labels(box_targets)
    return score_targets_dict, box_targets_dict
+class OlnAnchorLabeler(RpnAnchorLabeler):
+  """Labeler for Region Proposal Network."""
+  def __init__(self,
+               anchor,
+               match_threshold=0.7,
+               unmatched_threshold=0.3,
+               rpn_batch_size_per_im=256,
+               rpn_fg_fraction=0.5,
+               has_centerness=False,
+               center_match_iou_threshold=0.3,
+               center_unmatched_iou_threshold=0.1,
+               num_center_samples_per_im=256):
+    """Constructs rpn anchor labeler to assign labels and centerness to anchors.
+    Args:
+      anchor: an instance of class Anchors.
+      match_threshold: a float number between 0 and 1 representing the
+        lower-bound threshold to assign positive labels for anchors. An anchor
+        with a score over the threshold is labeled positive.
+      unmatched_threshold: a float number between 0 and 1 representing the
+        upper-bound threshold to assign negative labels for anchors. An anchor
+        with a score below the threshold is labeled negative.
+      rpn_batch_size_per_im: number of anchors that are sampled per image.
+      rpn_fg_fraction:
+      has_centerness: whether to include centerness target creation. An anchor
+        is paired with one centerness score.
+      center_match_iou_threshold: a float number between 0 and 1 representing
+        the lower-bound threshold to sample foreground anchors for centerness
+        regression. An anchor with a score over the threshold is sampled as
+        foreground sample for centerness regression. We sample mostly from the
+        foreground region (255 out of 256 samples). That is, we sample 255 vs 1
+        (foreground vs background) anchor points to learn centerness regression.
+      center_unmatched_iou_threshold: a float number between 0 and 1
+        representing the lower-bound threshold to sample background anchors for
+        centerness regression. An anchor with a score over the threshold is
+        sampled as foreground sample for centerness regression. We sample very
+        sparsely from the background region (1 out of 256 samples). That is, we
+        sample 255 vs 1 (foreground vs background) anchor points to learn
+        centerness regression.
+      num_center_samples_per_im: number of anchor points per image that are
+        sampled as centerness targets.
+    """
+    super(OlnAnchorLabeler, self).__init__(
+        anchor, match_threshold=match_threshold,
+        unmatched_threshold=unmatched_threshold,
+        rpn_batch_size_per_im=rpn_batch_size_per_im,
+        rpn_fg_fraction=rpn_fg_fraction)
+    similarity_calc = keras_cv.ops.IouSimilarity()
+    matcher = argmax_matcher.ArgMaxMatcher(
+        match_threshold,
+        unmatched_threshold=unmatched_threshold,
+        negatives_lower_than_unmatched=True,
+        force_match_for_each_row=True)
+    box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+    if has_centerness:
+      center_matcher = argmax_matcher.ArgMaxMatcher(
+          center_match_iou_threshold,
+          unmatched_threshold=center_match_iou_threshold,
+          negatives_lower_than_unmatched=True,
+          force_match_for_each_row=True,)
+    else:
+      center_matcher = None
+    self._target_assigner = target_assigner.OlnTargetAssigner(
+        similarity_calc, matcher, box_coder,
+        center_matcher=center_matcher)
+    self._num_center_samples_per_im = num_center_samples_per_im
+    self._center_unmatched_iou_threshold = center_unmatched_iou_threshold
+    self._rpn_batch_size_per_im = rpn_batch_size_per_im
+    self._rpn_fg_fraction = rpn_fg_fraction
+  def label_anchors_lrtb(self, gt_boxes, gt_labels):
+    """Labels anchors with ground truth inputs.
+    Args:
+      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
+        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
+      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
+        classes.
+    Returns:
+      score_targets_dict: ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors]. The height_l and width_l
+        represent the dimension of class logits at l-th level.
+      box_targets_dict: ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors * 4]. The height_l and
+        width_l represent the dimension of bounding box regression output at
+        l-th level.
+      lrtb_targets_dict: Same strucure to box_target_dict, except the regression
+        targets are converted from xyhw to lrtb format. Ordered dictionary with
+        keys [min_level, min_level+1, ..., max_level]. The values are tensor
+        with shape [height_l, width_l, num_anchors * 4]. The height_l and
+        width_l represent the dimension of bounding box regression output at
+        l-th level.
+      center_targets_dict: Same structure to score_tragets_dict, except the
+        scores are centerness values ranging from 0 to 1. Ordered dictionary
+        with keys [min_level, min_level+1, ..., max_level]. The values are
+        tensor with shape [height_l, width_l, num_anchors]. The height_l and
+        width_l represent the dimension of class logits at l-th level.
+    """
+    gt_box_list = box_list.BoxList(gt_boxes)
+    anchor_box_list = box_list.BoxList(self._anchor.boxes)
+    # cls_targets, cls_weights, box_weights are not used.
+    (_, _, box_targets, _, matches,
+     matched_gt_box_list, matched_anchors_mask,
+     center_matched_gt_box_list, center_matched_anchors_mask,
+     matched_ious) = self._target_assigner.assign(
+         anchor_box_list, gt_box_list, gt_labels)
+    # Box lrtb_targets.
+    lrtb_targets, _ = box_utils.encode_boxes_lrtb(
+        matched_gt_box_list.data['boxes'],
+        anchor_box_list.data['boxes'],
+        weights=[1.0, 1.0, 1.0, 1.0])
+    lrtb_sanity = tf.logical_and(
+        tf.greater(tf.reduce_min(lrtb_targets, -1), 0.),
+        matched_anchors_mask)
+    # To broadcast lrtb_sanity to the same shape as lrtb_targets.
+    lrtb_sanity = tf.tile(tf.expand_dims(lrtb_sanity, 1),
+                          [1, tf.shape(lrtb_targets)[1]])
+    lrtb_targets = tf.where(lrtb_sanity,
+                            lrtb_targets,
+                            tf.zeros_like(lrtb_targets))
+    # RPN anchor-gtbox iou values.
+    iou_targets = tf.where(tf.greater(matched_ious, 0.0),
+                           matched_ious,
+                           tf.zeros_like(matched_ious))
+    # Centerness_targets.
+    _, center_targets = box_utils.encode_boxes_lrtb(
+        center_matched_gt_box_list.data['boxes'],
+        anchor_box_list.data['boxes'],
+        weights=[1.0, 1.0, 1.0, 1.0])
+    # Positive-negative centerness sampler.
+    num_center_samples_per_im = self._num_center_samples_per_im
+    center_pos_neg_sampler = (
+        balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
+            positive_fraction=(1.- 1./num_center_samples_per_im),
+            is_static=False))
+    center_pos_neg_indicator = tf.logical_or(
+        center_matched_anchors_mask,
+        tf.less(iou_targets, self._center_unmatched_iou_threshold))
+    center_pos_labels = center_matched_anchors_mask
+    center_samples = center_pos_neg_sampler.subsample(
+        center_pos_neg_indicator, num_center_samples_per_im, center_pos_labels)
+    is_valid = center_samples
+    center_targets = tf.where(is_valid,
+                              center_targets,
+                              (-1) * tf.ones_like(center_targets))
+    # score_targets contains the subsampled positive and negative anchors.
+    score_targets, _, _ = self._get_rpn_samples(matches.match_results)
+    # Unpacks labels.
+    score_targets_dict = self._anchor.unpack_labels(score_targets)
+    box_targets_dict = self._anchor.unpack_labels(box_targets)
+    lrtb_targets_dict = self._anchor.unpack_labels(lrtb_targets)
+    center_targets_dict = self._anchor.unpack_labels(center_targets)
+    return (score_targets_dict, box_targets_dict,
+            lrtb_targets_dict, center_targets_dict)
--- a/official/vision/detection/dataloader/factory.py
+++ b/official/vision/detection/dataloader/factory.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 from official.vision.detection.dataloader import maskrcnn_parser
+from official.vision.detection.dataloader import olnmask_parser
 from official.vision.detection.dataloader import retinanet_parser
 from official.vision.detection.dataloader import shapemask_parser
@@ -69,6 +70,38 @@ def parser_generator(params, mode):
        mask_crop_size=parser_params.mask_crop_size,
        use_bfloat16=params.architecture.use_bfloat16,
        mode=mode)
+  elif params.architecture.parser == 'olnmask_parser':
+    anchor_params = params.anchor
+    parser_params = params.olnmask_parser
+    parser_fn = olnmask_parser.Parser(
+        output_size=parser_params.output_size,
+        min_level=params.architecture.min_level,
+        max_level=params.architecture.max_level,
+        num_scales=anchor_params.num_scales,
+        aspect_ratios=anchor_params.aspect_ratios,
+        anchor_size=anchor_params.anchor_size,
+        rpn_match_threshold=parser_params.rpn_match_threshold,
+        rpn_unmatched_threshold=parser_params.rpn_unmatched_threshold,
+        rpn_batch_size_per_im=parser_params.rpn_batch_size_per_im,
+        rpn_fg_fraction=parser_params.rpn_fg_fraction,
+        aug_rand_hflip=parser_params.aug_rand_hflip,
+        aug_scale_min=parser_params.aug_scale_min,
+        aug_scale_max=parser_params.aug_scale_max,
+        skip_crowd_during_training=parser_params.skip_crowd_during_training,
+        max_num_instances=parser_params.max_num_instances,
+        include_mask=params.architecture.include_mask,
+        mask_crop_size=parser_params.mask_crop_size,
+        use_bfloat16=params.architecture.use_bfloat16,
+        mode=mode,
+        has_centerness=parser_params.has_centerness,
+        rpn_center_match_iou_threshold=(
+            parser_params.rpn_center_match_iou_threshold),
+        rpn_center_unmatched_iou_threshold=(
+            parser_params.rpn_center_unmatched_iou_threshold),
+        rpn_num_center_samples_per_im=(
+            parser_params.rpn_num_center_samples_per_im),
+        class_agnostic=parser_params.class_agnostic,
+        train_class=parser_params.train_class,)
  elif params.architecture.parser == 'shapemask_parser':
    anchor_params = params.anchor
    parser_params = params.shapemask_parser

--- a/official/vision/detection/dataloader/olnmask_parser.py
+++ b/official/vision/detection/dataloader/olnmask_parser.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Data parser and processing for Mask R-CNN."""
+import tensorflow as tf
+from official.vision.detection.dataloader import anchor
+from official.vision.detection.dataloader.maskrcnn_parser import Parser as MaskrcnnParser
+from official.vision.detection.utils import box_utils
+from official.vision.detection.utils import class_utils
+from official.vision.detection.utils import input_utils
+class Parser(MaskrcnnParser):
+  """Parser to parse an image and its annotations into a dictionary of tensors."""
+  def __init__(self,
+               output_size,
+               min_level,
+               max_level,
+               num_scales,
+               aspect_ratios,
+               anchor_size,
+               rpn_match_threshold=0.7,
+               rpn_unmatched_threshold=0.3,
+               rpn_batch_size_per_im=256,
+               rpn_fg_fraction=0.5,
+               aug_rand_hflip=False,
+               aug_scale_min=1.0,
+               aug_scale_max=1.0,
+               skip_crowd_during_training=True,
+               max_num_instances=100,
+               include_mask=False,
+               mask_crop_size=112,
+               use_bfloat16=True,
+               mode=None,
+               # for centerness learning.
+               has_centerness=False,
+               rpn_center_match_iou_threshold=0.3,
+               rpn_center_unmatched_iou_threshold=0.1,
+               rpn_num_center_samples_per_im=256,
+               # for class manipulation.
+               class_agnostic=False,
+               train_class='all',
+               ):
+    """Initializes parameters for parsing annotations in the dataset.
+    Args:
+      output_size: `Tensor` or `list` for [height, width] of output image. The
+        output_size should be divided by the largest feature stride 2^max_level.
+      min_level: `int` number of minimum level of the output feature pyramid.
+      max_level: `int` number of maximum level of the output feature pyramid.
+      num_scales: `int` number representing intermediate scales added
+        on each level. For instances, num_scales=2 adds one additional
+        intermediate anchor scales [2^0, 2^0.5] on each level.
+      aspect_ratios: `list` of float numbers representing the aspect raito
+        anchors added on each level. The number indicates the ratio of width to
+        height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
+        on each scale level.
+      anchor_size: `float` number representing the scale of size of the base
+        anchor to the feature stride 2^level.
+      rpn_match_threshold:
+      rpn_unmatched_threshold:
+      rpn_batch_size_per_im:
+      rpn_fg_fraction:
+      aug_rand_hflip: `bool`, if True, augment training with random
+        horizontal flip.
+      aug_scale_min: `float`, the minimum scale applied to `output_size` for
+        data augmentation during training.
+      aug_scale_max: `float`, the maximum scale applied to `output_size` for
+        data augmentation during training.
+      skip_crowd_during_training: `bool`, if True, skip annotations labeled with
+        `is_crowd` equals to 1.
+      max_num_instances: `int` number of maximum number of instances in an
+        image. The groundtruth data will be padded to `max_num_instances`.
+      include_mask: a bool to indicate whether parse mask groundtruth.
+      mask_crop_size: the size which groundtruth mask is cropped to.
+      use_bfloat16: `bool`, if True, cast output image to tf.bfloat16.
+      mode: a ModeKeys. Specifies if this is training, evaluation, prediction
+        or prediction with groundtruths in the outputs.
+      has_centerness: whether to create centerness targets
+      rpn_center_match_iou_threshold: iou threshold for valid centerness samples
+        ,set to 0.3 by default.
+      rpn_center_unmatched_iou_threshold: iou threshold for invalid centerness
+        samples, set to 0.1 by default.
+      rpn_num_center_samples_per_im: number of centerness samples per image,
+        256 by default.
+      class_agnostic: whether to merge class ids into one foreground(=1) class,
+        False by default.
+      train_class: 'all' or 'voc' or 'nonvoc', 'all' by default.
+    """
+    super(Parser, self).__init__(
+        output_size=output_size,
+        min_level=min_level,
+        max_level=max_level,
+        num_scales=num_scales,
+        aspect_ratios=aspect_ratios,
+        anchor_size=anchor_size,
+        rpn_match_threshold=rpn_match_threshold,
+        rpn_unmatched_threshold=rpn_unmatched_threshold,
+        rpn_batch_size_per_im=rpn_batch_size_per_im,
+        rpn_fg_fraction=rpn_fg_fraction,
+        aug_rand_hflip=aug_rand_hflip,
+        aug_scale_min=aug_scale_min,
+        aug_scale_max=aug_scale_max,
+        skip_crowd_during_training=skip_crowd_during_training,
+        max_num_instances=max_num_instances,
+        include_mask=include_mask,
+        mask_crop_size=mask_crop_size,
+        use_bfloat16=use_bfloat16,
+        mode=mode,)
+    # Centerness target assigning.
+    self._has_centerness = has_centerness
+    self._rpn_center_match_iou_threshold = rpn_center_match_iou_threshold
+    self._rpn_center_unmatched_iou_threshold = (
+        rpn_center_unmatched_iou_threshold)
+    self._rpn_num_center_samples_per_im = rpn_num_center_samples_per_im
+    # Class manipulation.
+    self._class_agnostic = class_agnostic
+    self._train_class = train_class
+  def _parse_train_data(self, data):
+    """Parses data for training.
+    Args:
+      data: the decoded tensor dictionary from TfExampleDecoder.
+    Returns:
+      image: image tensor that is preproessed to have normalized value and
+        dimension [output_size[0], output_size[1], 3]
+      labels: a dictionary of tensors used for training. The following describes
+        {key: value} pairs in the dictionary.
+        image_info: a 2D `Tensor` that encodes the information of the image and
+          the applied preprocessing. It is in the format of
+          [[original_height, original_width], [scaled_height, scaled_width],
+        anchor_boxes: ordered dictionary with keys
+          [min_level, min_level+1, ..., max_level]. The values are tensor with
+          shape [height_l, width_l, 4] representing anchor boxes at each level.
+        rpn_score_targets: ordered dictionary with keys
+          [min_level, min_level+1, ..., max_level]. The values are tensor with
+          shape [height_l, width_l, anchors_per_location]. The height_l and
+          width_l represent the dimension of class logits at l-th level.
+        rpn_box_targets: ordered dictionary with keys
+          [min_level, min_level+1, ..., max_level]. The values are tensor with
+          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
+          width_l represent the dimension of bounding box regression output at
+          l-th level.
+        gt_boxes: Groundtruth bounding box annotations. The box is represented
+           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
+           image that is fed to the network. The tennsor is padded with -1 to
+           the fixed dimension [self._max_num_instances, 4].
+        gt_classes: Groundtruth classes annotations. The tennsor is padded
+          with -1 to the fixed dimension [self._max_num_instances].
+        gt_masks: groundtrugh masks cropped by the bounding box and
+          resized to a fixed size determined by mask_crop_size.
+    """
+    classes = data['groundtruth_classes']
+    boxes = data['groundtruth_boxes']
+    if self._include_mask:
+      masks = data['groundtruth_instance_masks']
+    is_crowds = data['groundtruth_is_crowd']
+    # Skips annotations with `is_crowd` = True.
+    if self._skip_crowd_during_training and self._is_training:
+      num_groundtruths = tf.shape(classes)[0]
+      with tf.control_dependencies([num_groundtruths, is_crowds]):
+        indices = tf.cond(
+            tf.greater(tf.size(is_crowds), 0),
+            lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
+            lambda: tf.cast(tf.range(num_groundtruths), tf.int64))
+      classes = tf.gather(classes, indices)
+      boxes = tf.gather(boxes, indices)
+      if self._include_mask:
+        masks = tf.gather(masks, indices)
+    # Gets original image and its size.
+    image = data['image']
+    image_shape = tf.shape(image)[0:2]
+    # Normalizes image with mean and std pixel values.
+    image = input_utils.normalize_image(image)
+    # Flips image randomly during training.
+    if self._aug_rand_hflip:
+      if self._include_mask:
+        image, boxes, masks = input_utils.random_horizontal_flip(
+            image, boxes, masks)
+      else:
+        image, boxes = input_utils.random_horizontal_flip(
+            image, boxes)
+    # Converts boxes from normalized coordinates to pixel coordinates.
+    # Now the coordinates of boxes are w.r.t. the original image.
+    boxes = box_utils.denormalize_boxes(boxes, image_shape)
+    # Resizes and crops image.
+    image, image_info = input_utils.resize_and_crop_image(
+        image,
+        self._output_size,
+        padded_size=input_utils.compute_padded_size(
+            self._output_size, 2 ** self._max_level),
+        aug_scale_min=self._aug_scale_min,
+        aug_scale_max=self._aug_scale_max)
+    image_height, image_width, _ = image.get_shape().as_list()
+    # Resizes and crops boxes.
+    # Now the coordinates of boxes are w.r.t the scaled image.
+    image_scale = image_info[2, :]
+    offset = image_info[3, :]
+    boxes = input_utils.resize_and_crop_boxes(
+        boxes, image_scale, image_info[1, :], offset)
+    # Filters out ground truth boxes that are all zeros.
+    indices = box_utils.get_non_empty_box_indices(boxes)
+    boxes = tf.gather(boxes, indices)
+    classes = tf.gather(classes, indices)
+    if self._include_mask:
+      masks = tf.gather(masks, indices)
+      # Transfer boxes to the original image space and do normalization.
+      cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
+      cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2])
+      cropped_boxes = box_utils.normalize_boxes(cropped_boxes, image_shape)
+      num_masks = tf.shape(masks)[0]
+      masks = tf.image.crop_and_resize(
+          tf.expand_dims(masks, axis=-1),
+          cropped_boxes,
+          box_indices=tf.range(num_masks, dtype=tf.int32),
+          crop_size=[self._mask_crop_size, self._mask_crop_size],
+          method='bilinear')
+      masks = tf.squeeze(masks, axis=-1)
+    # Class manipulation.
+    # Filter out novel split classes from training.
+    if self._train_class != 'all':
+      valid_classes = tf.cast(
+          class_utils.coco_split_class_ids(self._train_class),
+          dtype=classes.dtype)
+      match = tf.reduce_any(tf.equal(
+          tf.expand_dims(valid_classes, 1),
+          tf.expand_dims(classes, 0)), 0)
+      # kill novel split classes and boxes.
+      boxes = tf.gather(boxes, tf.where(match)[:, 0])
+      classes = tf.gather(classes, tf.where(match)[:, 0])
+      if self._include_mask:
+        masks = tf.gather(masks, tf.where(match)[:, 0])
+    # Assigns anchor targets.
+    # Note that after the target assignment, box targets are absolute pixel
+    # offsets w.r.t. the scaled image.
+    input_anchor = anchor.Anchor(
+        self._min_level,
+        self._max_level,
+        self._num_scales,
+        self._aspect_ratios,
+        self._anchor_size,
+        (image_height, image_width))
+    anchor_labeler = anchor.OlnAnchorLabeler(
+        input_anchor,
+        self._rpn_match_threshold,
+        self._rpn_unmatched_threshold,
+        self._rpn_batch_size_per_im,
+        self._rpn_fg_fraction,
+        # for centerness target.
+        self._has_centerness,
+        self._rpn_center_match_iou_threshold,
+        self._rpn_center_unmatched_iou_threshold,
+        self._rpn_num_center_samples_per_im,)
+    if self._has_centerness:
+      rpn_score_targets, _, rpn_lrtb_targets, rpn_center_targets = (
+          anchor_labeler.label_anchors_lrtb(
+              gt_boxes=boxes,
+              gt_labels=tf.cast(
+                  tf.expand_dims(classes, axis=-1), dtype=tf.float32)))
+    else:
+      rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
+          boxes, tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))
+      # For base rpn, dummy placeholder for centerness target.
+      rpn_center_targets = rpn_score_targets.copy()
+    # If bfloat16 is used, casts input image to tf.bfloat16.
+    if self._use_bfloat16:
+      image = tf.cast(image, dtype=tf.bfloat16)
+    inputs = {
+        'image': image,
+        'image_info': image_info,
+    }
+    # Packs labels for model_fn outputs.
+    labels = {
+        'anchor_boxes': input_anchor.multilevel_boxes,
+        'image_info': image_info,
+        'rpn_score_targets': rpn_score_targets,
+        'rpn_box_targets': (rpn_lrtb_targets if self._has_centerness
+                            else rpn_box_targets),
+        'rpn_center_targets': rpn_center_targets,
+    }
+    # If class_agnostic, convert to binary classes.
+    if self._class_agnostic:
+      classes = tf.where(tf.greater(classes, 0),
+                         tf.ones_like(classes),
+                         tf.zeros_like(classes))
+    inputs['gt_boxes'] = input_utils.pad_to_fixed_size(boxes,
+                                                       self._max_num_instances,
+                                                       -1)
+    inputs['gt_classes'] = input_utils.pad_to_fixed_size(
+        classes, self._max_num_instances, -1)
+    if self._include_mask:
+      inputs['gt_masks'] = input_utils.pad_to_fixed_size(
+          masks, self._max_num_instances, -1)
+    return inputs, labels
--- a/official/vision/detection/utils/box_utils.py
+++ b/official/vision/detection/utils/box_utils.py
@@ -366,6 +366,156 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
    return decoded_boxes
+def encode_boxes_lrtb(boxes, anchors, weights=None):
+  """Encode boxes to targets on lrtb (=left,right,top,bottom) format.
+  Args:
+    boxes: a tensor whose last dimension is 4 representing the coordinates
+      of boxes in ymin, xmin, ymax, xmax order.
+    anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
+      representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
+    weights: None or a list of four float numbers used to scale coordinates.
+  Returns:
+    encoded_boxes_lrtb: a tensor whose shape is the same as `boxes` representing
+      the encoded box targets. The box targets encode the left, right, top,
+      bottom distances from an anchor location to the four borders of the
+      matched groundtruth bounding box.
+    center_targets: centerness targets defined by the left, right, top, and
+      bottom distance targets. The centerness is defined as the deviation of the
+      anchor location from the groundtruth object center. Formally, centerness =
+      sqrt(min(left, right)/max(left, right)*min(top, bottom)/max(top, bottom)).
+  Raises:
+    ValueError: If the last dimension of boxes is not 4.
+  """
+  if boxes.shape[-1] != 4:
+    raise ValueError(
+        'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1]))
+  with tf.name_scope('encode_boxes_lrtb'):
+    boxes = tf.cast(boxes, dtype=anchors.dtype)
+    ymin = boxes[..., 0:1]
+    xmin = boxes[..., 1:2]
+    ymax = boxes[..., 2:3]
+    xmax = boxes[..., 3:4]
+    # box_h = ymax - ymin + 1.0
+    # box_w = xmax - xmin + 1.0
+    box_h = ymax - ymin
+    box_w = xmax - xmin
+    anchor_ymin = anchors[..., 0:1]
+    anchor_xmin = anchors[..., 1:2]
+    anchor_ymax = anchors[..., 2:3]
+    anchor_xmax = anchors[..., 3:4]
+    # anchor_h = anchor_ymax - anchor_ymin + 1.0
+    # anchor_w = anchor_xmax - anchor_xmin + 1.0
+    anchor_h = anchor_ymax - anchor_ymin
+    anchor_w = anchor_xmax - anchor_xmin
+    anchor_yc = anchor_ymin + 0.5 * anchor_h
+    anchor_xc = anchor_xmin + 0.5 * anchor_w
+    box_h += EPSILON
+    box_w += EPSILON
+    anchor_h += EPSILON
+    anchor_w += EPSILON
+    left = (anchor_xc - xmin) / anchor_w
+    right = (xmax - anchor_xc) / anchor_w
+    top = (anchor_yc - ymin) / anchor_h
+    bottom = (ymax - anchor_yc) / anchor_h
+    # Create centerness target. {
+    lrtb_targets = tf.concat([left, right, top, bottom], axis=-1)
+    valid_match = tf.greater(tf.reduce_min(lrtb_targets, -1), 0.0)
+    # Centerness score.
+    left_right = tf.concat([left, right], axis=-1)
+    left_right = tf.where(tf.stack([valid_match, valid_match], -1),
+                          left_right, tf.zeros_like(left_right))
+    top_bottom = tf.concat([top, bottom], axis=-1)
+    top_bottom = tf.where(tf.stack([valid_match, valid_match], -1),
+                          top_bottom, tf.zeros_like(top_bottom))
+    center_targets = tf.sqrt(
+        (tf.reduce_min(left_right, -1) /
+         (tf.reduce_max(left_right, -1) + EPSILON)) *
+        (tf.reduce_min(top_bottom, -1) /
+         (tf.reduce_max(top_bottom, -1) + EPSILON)))
+    center_targets = tf.where(valid_match,
+                              center_targets,
+                              tf.zeros_like(center_targets))
+    if weights:
+      left *= weights[0]
+      right *= weights[1]
+      top *= weights[2]
+      bottom *= weights[3]
+    encoded_boxes_lrtb = tf.concat(
+        [left, right, top, bottom],
+        axis=-1)
+    return encoded_boxes_lrtb, center_targets
+def decode_boxes_lrtb(encoded_boxes_lrtb, anchors, weights=None):
+  """Decode boxes.
+  Args:
+    encoded_boxes_lrtb: a tensor whose last dimension is 4 representing the
+      coordinates of encoded boxes in left, right, top, bottom order.
+    anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
+      representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
+    weights: None or a list of four float numbers used to scale coordinates.
+  Returns:
+    decoded_boxes_lrtb: a tensor whose shape is the same as `boxes` representing
+      the decoded box targets in lrtb (=left,right,top,bottom) format. The box
+      decoded box coordinates represent the left, right, top, and bottom
+      distances from an anchor location to the four borders of the matched
+      groundtruth bounding box.
+  """
+  if encoded_boxes_lrtb.shape[-1] != 4:
+    raise ValueError(
+        'encoded_boxes_lrtb.shape[-1] is {:d}, but must be 4.'
+        .format(encoded_boxes_lrtb.shape[-1]))
+  with tf.name_scope('decode_boxes_lrtb'):
+    encoded_boxes_lrtb = tf.cast(encoded_boxes_lrtb, dtype=anchors.dtype)
+    left = encoded_boxes_lrtb[..., 0:1]
+    right = encoded_boxes_lrtb[..., 1:2]
+    top = encoded_boxes_lrtb[..., 2:3]
+    bottom = encoded_boxes_lrtb[..., 3:4]
+    if weights:
+      left /= weights[0]
+      right /= weights[1]
+      top /= weights[2]
+      bottom /= weights[3]
+    anchor_ymin = anchors[..., 0:1]
+    anchor_xmin = anchors[..., 1:2]
+    anchor_ymax = anchors[..., 2:3]
+    anchor_xmax = anchors[..., 3:4]
+    anchor_h = anchor_ymax - anchor_ymin
+    anchor_w = anchor_xmax - anchor_xmin
+    anchor_yc = anchor_ymin + 0.5 * anchor_h
+    anchor_xc = anchor_xmin + 0.5 * anchor_w
+    anchor_h += EPSILON
+    anchor_w += EPSILON
+    decoded_boxes_ymin = anchor_yc - top * anchor_h
+    decoded_boxes_xmin = anchor_xc - left * anchor_w
+    decoded_boxes_ymax = anchor_yc + bottom * anchor_h
+    decoded_boxes_xmax = anchor_xc + right * anchor_w
+    decoded_boxes_lrtb = tf.concat(
+        [decoded_boxes_ymin, decoded_boxes_xmin,
+         decoded_boxes_ymax, decoded_boxes_xmax],
+        axis=-1)
+    return decoded_boxes_lrtb
 def filter_boxes(boxes, scores, image_shape, min_size_threshold):
  """Filter and remove boxes that are too small or fall outside the image.

--- a/official/vision/detection/utils/object_detection/target_assigner.py
+++ b/official/vision/detection/utils/object_detection/target_assigner.py
@@ -315,3 +315,209 @@ class TargetAssigner(object):
      BoxCoder object.
    """
    return self._box_coder
+class OlnTargetAssigner(TargetAssigner):
+  """Target assigner to compute classification and regression targets."""
+  def __init__(self,
+               similarity_calc,
+               matcher,
+               box_coder,
+               negative_class_weight=1.0,
+               unmatched_cls_target=None,
+               center_matcher=None):
+    """Construct Object Detection Target Assigner.
+    Args:
+      similarity_calc: a RegionSimilarityCalculator
+      matcher: Matcher used to match groundtruth to anchors.
+      box_coder: BoxCoder used to encode matching groundtruth boxes with respect
+        to anchors.
+      negative_class_weight: classification weight to be associated to negative
+        anchors (default: 1.0). The weight must be in [0., 1.].
+      unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
+        which is consistent with the classification target for each anchor (and
+        can be empty for scalar targets).  This shape must thus be compatible
+        with the groundtruth labels that are passed to the "assign" function
+        (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). If set to None,
+        unmatched_cls_target is set to be [0] for each anchor.
+      center_matcher: Matcher used to match groundtruth to anchors to sample and
+        assign the regression targets of centerness to each anchor.
+    Raises:
+      ValueError: if similarity_calc is not a RegionSimilarityCalculator or
+        if matcher is not a Matcher or if box_coder is not a BoxCoder
+    """
+    super(OlnTargetAssigner, self).__init__(
+        similarity_calc=similarity_calc,
+        matcher=matcher,
+        box_coder=box_coder,
+        negative_class_weight=negative_class_weight,
+        unmatched_cls_target=unmatched_cls_target)
+    # centerness-matcher with independent sampling IoU threshold.
+    self._center_matcher = center_matcher
+  def assign(self,
+             anchors,
+             groundtruth_boxes,
+             groundtruth_labels=None,
+             groundtruth_weights=None,
+             **params):
+    """Assign classification and regression targets to each anchor.
+    For a given set of anchors and groundtruth detections, match anchors
+    to groundtruth_boxes and assign classification and regression targets to
+    each anchor as well as weights based on the resulting match (specifying,
+    e.g., which anchors should not contribute to training loss).
+    Anchors that are not matched to anything are given a classification target
+    of self._unmatched_cls_target which can be specified via the constructor.
+    Args:
+      anchors: a BoxList representing N anchors
+      groundtruth_boxes: a BoxList representing M groundtruth boxes
+      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k] with labels for
+        each of the ground_truth boxes. The subshape [d_1, ... d_k] can be empty
+        (corresponding to scalar inputs).  When set to None, groundtruth_labels
+        assumes a binary problem where all ground_truth boxes get a positive
+        label (of 1).
+      groundtruth_weights: a float tensor of shape [M] indicating the weight to
+        assign to all anchors match to a particular groundtruth box. The weights
+        must be in [0., 1.]. If None, all weights are set to 1.
+      **params: Additional keyword arguments for specific implementations of the
+        Matcher.
+    Returns:
+      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+      cls_weights: a float32 tensor with shape [num_anchors]
+      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
+      reg_weights: a float32 tensor with shape [num_anchors]
+      match: a matcher.Match object encoding the match between anchors and
+        groundtruth boxes, with rows corresponding to groundtruth boxes
+        and columns corresponding to anchors.
+      matched_gt_boxlist: a BoxList object with data of float32 tensor with
+        shape [num_anchors, box_dimension] which encodes the coordinates of the
+        matched groundtruth boxes.
+      matched_anchors_mask: a Bool tensor with shape [num_anchors] which
+        indicates whether an anchor is matched or not.
+      center_matched_gt_boxlist: a BoxList object with data of float32 tensor
+        with shape [num_anchors, box_dimension] which encodes the coordinates of
+        the groundtruth boxes matched for centerness target assignment.
+      center_matched_anchors_mask: a Boolean tensor with shape [num_anchors]
+        which indicates whether an anchor is matched or not for centerness
+        target assignment.
+      matched_ious: a float32 tensor with shape [num_anchors] which encodes the
+        ious between each anchor and the matched groundtruth boxes.
+    Raises:
+      ValueError: if anchors or groundtruth_boxes are not of type
+        box_list.BoxList
+    """
+    if not isinstance(anchors, box_list.BoxList):
+      raise ValueError('anchors must be an BoxList')
+    if not isinstance(groundtruth_boxes, box_list.BoxList):
+      raise ValueError('groundtruth_boxes must be an BoxList')
+    if groundtruth_labels is None:
+      groundtruth_labels = tf.ones(
+          tf.expand_dims(groundtruth_boxes.num_boxes(), 0))
+      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
+    unmatched_shape_assert = shape_utils.assert_shape_equal(
+        shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
+        shape_utils.combined_static_and_dynamic_shape(
+            self._unmatched_cls_target))
+    labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
+        shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[:1],
+        shape_utils.combined_static_and_dynamic_shape(
+            groundtruth_boxes.get())[:1])
+    if groundtruth_weights is None:
+      num_gt_boxes = groundtruth_boxes.num_boxes_static()
+      if not num_gt_boxes:
+        num_gt_boxes = groundtruth_boxes.num_boxes()
+      groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
+    with tf.control_dependencies(
+        [unmatched_shape_assert, labels_and_box_shapes_assert]):
+      match_quality_matrix = self._similarity_calc(
+          groundtruth_boxes.get(), anchors.get())
+      match = self._matcher.match(match_quality_matrix, **params)
+      reg_targets, matched_gt_boxlist, matched_anchors_mask = (
+          self._create_regression_targets(anchors,
+                                          groundtruth_boxes,
+                                          match))
+      cls_targets = self._create_classification_targets(groundtruth_labels,
+                                                        match)
+      reg_weights = self._create_regression_weights(match, groundtruth_weights)
+      cls_weights = self._create_classification_weights(match,
+                                                        groundtruth_weights)
+      # Match for creation of centerness regression targets.
+      if self._center_matcher is not None:
+        center_match = self._center_matcher.match(
+            match_quality_matrix, **params)
+        center_matched_gt_boxes = center_match.gather_based_on_match(
+            groundtruth_boxes.get(),
+            unmatched_value=tf.zeros(4),
+            ignored_value=tf.zeros(4))
+        center_matched_gt_boxlist = box_list.BoxList(center_matched_gt_boxes)
+        center_matched_anchors_mask = center_match.matched_column_indicator()
+    num_anchors = anchors.num_boxes_static()
+    if num_anchors is not None:
+      reg_targets = self._reset_target_shape(reg_targets, num_anchors)
+      cls_targets = self._reset_target_shape(cls_targets, num_anchors)
+      reg_weights = self._reset_target_shape(reg_weights, num_anchors)
+      cls_weights = self._reset_target_shape(cls_weights, num_anchors)
+    if self._center_matcher is not None:
+      matched_ious = tf.reduce_max(match_quality_matrix, 0)
+      return (cls_targets, cls_weights, reg_targets, reg_weights, match,
+              matched_gt_boxlist, matched_anchors_mask,
+              center_matched_gt_boxlist, center_matched_anchors_mask,
+              matched_ious)
+    else:
+      return (cls_targets, cls_weights, reg_targets, reg_weights, match)
+  def _create_regression_targets(self, anchors, groundtruth_boxes, match):
+    """Returns a regression target for each anchor.
+    Args:
+      anchors: a BoxList representing N anchors
+      groundtruth_boxes: a BoxList representing M groundtruth_boxes
+      match: a matcher.Match object
+    Returns:
+      reg_targets: a float32 tensor with shape [N, box_code_dimension]
+    """
+    matched_gt_boxes = match.gather_based_on_match(
+        groundtruth_boxes.get(),
+        unmatched_value=tf.zeros(4),
+        ignored_value=tf.zeros(4))
+    matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
+    if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME):
+      groundtruth_keypoints = groundtruth_boxes.get_field(KEYPOINTS_FIELD_NAME)
+      matched_keypoints = match.gather_based_on_match(
+          groundtruth_keypoints,
+          unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
+          ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
+      matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME, matched_keypoints)
+    matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
+    match_results_shape = shape_utils.combined_static_and_dynamic_shape(
+        match.match_results)
+    # Zero out the unmatched and ignored regression targets.
+    unmatched_ignored_reg_targets = tf.tile(self._default_regression_target(),
+                                            [match_results_shape[0], 1])
+    matched_anchors_mask = match.matched_column_indicator()
+    # To broadcast matched_anchors_mask to the same shape as
+    # matched_reg_targets.
+    matched_anchors_mask_tiled = tf.tile(
+        tf.expand_dims(matched_anchors_mask, 1),
+        [1, tf.shape(matched_reg_targets)[1]])
+    reg_targets = tf.where(matched_anchors_mask_tiled,
+                           matched_reg_targets,
+                           unmatched_ignored_reg_targets)
+    return reg_targets, matched_gt_boxlist, matched_anchors_mask