Commit ff47e0d6 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 344134923
parent 5a4a3ac3
...@@ -22,6 +22,7 @@ import collections ...@@ -22,6 +22,7 @@ import collections
import tensorflow as tf import tensorflow as tf
from official.vision import keras_cv from official.vision import keras_cv
from official.vision.detection.utils import box_utils
from official.vision.detection.utils.object_detection import argmax_matcher from official.vision.detection.utils.object_detection import argmax_matcher
from official.vision.detection.utils.object_detection import balanced_positive_negative_sampler from official.vision.detection.utils.object_detection import balanced_positive_negative_sampler
from official.vision.detection.utils.object_detection import box_list from official.vision.detection.utils.object_detection import box_list
...@@ -290,3 +291,168 @@ class RpnAnchorLabeler(AnchorLabeler): ...@@ -290,3 +291,168 @@ class RpnAnchorLabeler(AnchorLabeler):
box_targets_dict = self._anchor.unpack_labels(box_targets) box_targets_dict = self._anchor.unpack_labels(box_targets)
return score_targets_dict, box_targets_dict return score_targets_dict, box_targets_dict
class OlnAnchorLabeler(RpnAnchorLabeler):
"""Labeler for Region Proposal Network."""
def __init__(self,
anchor,
match_threshold=0.7,
unmatched_threshold=0.3,
rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5,
has_centerness=False,
center_match_iou_threshold=0.3,
center_unmatched_iou_threshold=0.1,
num_center_samples_per_im=256):
"""Constructs rpn anchor labeler to assign labels and centerness to anchors.
Args:
anchor: an instance of class Anchors.
match_threshold: a float number between 0 and 1 representing the
lower-bound threshold to assign positive labels for anchors. An anchor
with a score over the threshold is labeled positive.
unmatched_threshold: a float number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
rpn_batch_size_per_im: number of anchors that are sampled per image.
rpn_fg_fraction:
has_centerness: whether to include centerness target creation. An anchor
is paired with one centerness score.
center_match_iou_threshold: a float number between 0 and 1 representing
the lower-bound threshold to sample foreground anchors for centerness
regression. An anchor with a score over the threshold is sampled as
foreground sample for centerness regression. We sample mostly from the
foreground region (255 out of 256 samples). That is, we sample 255 vs 1
(foreground vs background) anchor points to learn centerness regression.
center_unmatched_iou_threshold: a float number between 0 and 1
representing the lower-bound threshold to sample background anchors for
centerness regression. An anchor with a score over the threshold is
sampled as foreground sample for centerness regression. We sample very
sparsely from the background region (1 out of 256 samples). That is, we
sample 255 vs 1 (foreground vs background) anchor points to learn
centerness regression.
num_center_samples_per_im: number of anchor points per image that are
sampled as centerness targets.
"""
super(OlnAnchorLabeler, self).__init__(
anchor, match_threshold=match_threshold,
unmatched_threshold=unmatched_threshold,
rpn_batch_size_per_im=rpn_batch_size_per_im,
rpn_fg_fraction=rpn_fg_fraction)
similarity_calc = keras_cv.ops.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(
match_threshold,
unmatched_threshold=unmatched_threshold,
negatives_lower_than_unmatched=True,
force_match_for_each_row=True)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
if has_centerness:
center_matcher = argmax_matcher.ArgMaxMatcher(
center_match_iou_threshold,
unmatched_threshold=center_match_iou_threshold,
negatives_lower_than_unmatched=True,
force_match_for_each_row=True,)
else:
center_matcher = None
self._target_assigner = target_assigner.OlnTargetAssigner(
similarity_calc, matcher, box_coder,
center_matcher=center_matcher)
self._num_center_samples_per_im = num_center_samples_per_im
self._center_unmatched_iou_threshold = center_unmatched_iou_threshold
self._rpn_batch_size_per_im = rpn_batch_size_per_im
self._rpn_fg_fraction = rpn_fg_fraction
def label_anchors_lrtb(self, gt_boxes, gt_labels):
"""Labels anchors with ground truth inputs.
Args:
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors]. The height_l and width_l
represent the dimension of class logits at l-th level.
box_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
lrtb_targets_dict: Same strucure to box_target_dict, except the regression
targets are converted from xyhw to lrtb format. Ordered dictionary with
keys [min_level, min_level+1, ..., max_level]. The values are tensor
with shape [height_l, width_l, num_anchors * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
center_targets_dict: Same structure to score_tragets_dict, except the
scores are centerness values ranging from 0 to 1. Ordered dictionary
with keys [min_level, min_level+1, ..., max_level]. The values are
tensor with shape [height_l, width_l, num_anchors]. The height_l and
width_l represent the dimension of class logits at l-th level.
"""
gt_box_list = box_list.BoxList(gt_boxes)
anchor_box_list = box_list.BoxList(self._anchor.boxes)
# cls_targets, cls_weights, box_weights are not used.
(_, _, box_targets, _, matches,
matched_gt_box_list, matched_anchors_mask,
center_matched_gt_box_list, center_matched_anchors_mask,
matched_ious) = self._target_assigner.assign(
anchor_box_list, gt_box_list, gt_labels)
# Box lrtb_targets.
lrtb_targets, _ = box_utils.encode_boxes_lrtb(
matched_gt_box_list.data['boxes'],
anchor_box_list.data['boxes'],
weights=[1.0, 1.0, 1.0, 1.0])
lrtb_sanity = tf.logical_and(
tf.greater(tf.reduce_min(lrtb_targets, -1), 0.),
matched_anchors_mask)
# To broadcast lrtb_sanity to the same shape as lrtb_targets.
lrtb_sanity = tf.tile(tf.expand_dims(lrtb_sanity, 1),
[1, tf.shape(lrtb_targets)[1]])
lrtb_targets = tf.where(lrtb_sanity,
lrtb_targets,
tf.zeros_like(lrtb_targets))
# RPN anchor-gtbox iou values.
iou_targets = tf.where(tf.greater(matched_ious, 0.0),
matched_ious,
tf.zeros_like(matched_ious))
# Centerness_targets.
_, center_targets = box_utils.encode_boxes_lrtb(
center_matched_gt_box_list.data['boxes'],
anchor_box_list.data['boxes'],
weights=[1.0, 1.0, 1.0, 1.0])
# Positive-negative centerness sampler.
num_center_samples_per_im = self._num_center_samples_per_im
center_pos_neg_sampler = (
balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
positive_fraction=(1.- 1./num_center_samples_per_im),
is_static=False))
center_pos_neg_indicator = tf.logical_or(
center_matched_anchors_mask,
tf.less(iou_targets, self._center_unmatched_iou_threshold))
center_pos_labels = center_matched_anchors_mask
center_samples = center_pos_neg_sampler.subsample(
center_pos_neg_indicator, num_center_samples_per_im, center_pos_labels)
is_valid = center_samples
center_targets = tf.where(is_valid,
center_targets,
(-1) * tf.ones_like(center_targets))
# score_targets contains the subsampled positive and negative anchors.
score_targets, _, _ = self._get_rpn_samples(matches.match_results)
# Unpacks labels.
score_targets_dict = self._anchor.unpack_labels(score_targets)
box_targets_dict = self._anchor.unpack_labels(box_targets)
lrtb_targets_dict = self._anchor.unpack_labels(lrtb_targets)
center_targets_dict = self._anchor.unpack_labels(center_targets)
return (score_targets_dict, box_targets_dict,
lrtb_targets_dict, center_targets_dict)
...@@ -19,6 +19,7 @@ from __future__ import division ...@@ -19,6 +19,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from official.vision.detection.dataloader import maskrcnn_parser from official.vision.detection.dataloader import maskrcnn_parser
from official.vision.detection.dataloader import olnmask_parser
from official.vision.detection.dataloader import retinanet_parser from official.vision.detection.dataloader import retinanet_parser
from official.vision.detection.dataloader import shapemask_parser from official.vision.detection.dataloader import shapemask_parser
...@@ -69,6 +70,38 @@ def parser_generator(params, mode): ...@@ -69,6 +70,38 @@ def parser_generator(params, mode):
mask_crop_size=parser_params.mask_crop_size, mask_crop_size=parser_params.mask_crop_size,
use_bfloat16=params.architecture.use_bfloat16, use_bfloat16=params.architecture.use_bfloat16,
mode=mode) mode=mode)
elif params.architecture.parser == 'olnmask_parser':
anchor_params = params.anchor
parser_params = params.olnmask_parser
parser_fn = olnmask_parser.Parser(
output_size=parser_params.output_size,
min_level=params.architecture.min_level,
max_level=params.architecture.max_level,
num_scales=anchor_params.num_scales,
aspect_ratios=anchor_params.aspect_ratios,
anchor_size=anchor_params.anchor_size,
rpn_match_threshold=parser_params.rpn_match_threshold,
rpn_unmatched_threshold=parser_params.rpn_unmatched_threshold,
rpn_batch_size_per_im=parser_params.rpn_batch_size_per_im,
rpn_fg_fraction=parser_params.rpn_fg_fraction,
aug_rand_hflip=parser_params.aug_rand_hflip,
aug_scale_min=parser_params.aug_scale_min,
aug_scale_max=parser_params.aug_scale_max,
skip_crowd_during_training=parser_params.skip_crowd_during_training,
max_num_instances=parser_params.max_num_instances,
include_mask=params.architecture.include_mask,
mask_crop_size=parser_params.mask_crop_size,
use_bfloat16=params.architecture.use_bfloat16,
mode=mode,
has_centerness=parser_params.has_centerness,
rpn_center_match_iou_threshold=(
parser_params.rpn_center_match_iou_threshold),
rpn_center_unmatched_iou_threshold=(
parser_params.rpn_center_unmatched_iou_threshold),
rpn_num_center_samples_per_im=(
parser_params.rpn_num_center_samples_per_im),
class_agnostic=parser_params.class_agnostic,
train_class=parser_params.train_class,)
elif params.architecture.parser == 'shapemask_parser': elif params.architecture.parser == 'shapemask_parser':
anchor_params = params.anchor anchor_params = params.anchor
parser_params = params.shapemask_parser parser_params = params.shapemask_parser
......
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Data parser and processing for Mask R-CNN."""
import tensorflow as tf
from official.vision.detection.dataloader import anchor
from official.vision.detection.dataloader.maskrcnn_parser import Parser as MaskrcnnParser
from official.vision.detection.utils import box_utils
from official.vision.detection.utils import class_utils
from official.vision.detection.utils import input_utils
class Parser(MaskrcnnParser):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def __init__(self,
output_size,
min_level,
max_level,
num_scales,
aspect_ratios,
anchor_size,
rpn_match_threshold=0.7,
rpn_unmatched_threshold=0.3,
rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5,
aug_rand_hflip=False,
aug_scale_min=1.0,
aug_scale_max=1.0,
skip_crowd_during_training=True,
max_num_instances=100,
include_mask=False,
mask_crop_size=112,
use_bfloat16=True,
mode=None,
# for centerness learning.
has_centerness=False,
rpn_center_match_iou_threshold=0.3,
rpn_center_unmatched_iou_threshold=0.1,
rpn_num_center_samples_per_im=256,
# for class manipulation.
class_agnostic=False,
train_class='all',
):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: `Tensor` or `list` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
min_level: `int` number of minimum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid.
num_scales: `int` number representing intermediate scales added
on each level. For instances, num_scales=2 adds one additional
intermediate anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: `list` of float numbers representing the aspect raito
anchors added on each level. The number indicates the ratio of width to
height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
on each scale level.
anchor_size: `float` number representing the scale of size of the base
anchor to the feature stride 2^level.
rpn_match_threshold:
rpn_unmatched_threshold:
rpn_batch_size_per_im:
rpn_fg_fraction:
aug_rand_hflip: `bool`, if True, augment training with random
horizontal flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
data augmentation during training.
skip_crowd_during_training: `bool`, if True, skip annotations labeled with
`is_crowd` equals to 1.
max_num_instances: `int` number of maximum number of instances in an
image. The groundtruth data will be padded to `max_num_instances`.
include_mask: a bool to indicate whether parse mask groundtruth.
mask_crop_size: the size which groundtruth mask is cropped to.
use_bfloat16: `bool`, if True, cast output image to tf.bfloat16.
mode: a ModeKeys. Specifies if this is training, evaluation, prediction
or prediction with groundtruths in the outputs.
has_centerness: whether to create centerness targets
rpn_center_match_iou_threshold: iou threshold for valid centerness samples
,set to 0.3 by default.
rpn_center_unmatched_iou_threshold: iou threshold for invalid centerness
samples, set to 0.1 by default.
rpn_num_center_samples_per_im: number of centerness samples per image,
256 by default.
class_agnostic: whether to merge class ids into one foreground(=1) class,
False by default.
train_class: 'all' or 'voc' or 'nonvoc', 'all' by default.
"""
super(Parser, self).__init__(
output_size=output_size,
min_level=min_level,
max_level=max_level,
num_scales=num_scales,
aspect_ratios=aspect_ratios,
anchor_size=anchor_size,
rpn_match_threshold=rpn_match_threshold,
rpn_unmatched_threshold=rpn_unmatched_threshold,
rpn_batch_size_per_im=rpn_batch_size_per_im,
rpn_fg_fraction=rpn_fg_fraction,
aug_rand_hflip=aug_rand_hflip,
aug_scale_min=aug_scale_min,
aug_scale_max=aug_scale_max,
skip_crowd_during_training=skip_crowd_during_training,
max_num_instances=max_num_instances,
include_mask=include_mask,
mask_crop_size=mask_crop_size,
use_bfloat16=use_bfloat16,
mode=mode,)
# Centerness target assigning.
self._has_centerness = has_centerness
self._rpn_center_match_iou_threshold = rpn_center_match_iou_threshold
self._rpn_center_unmatched_iou_threshold = (
rpn_center_unmatched_iou_threshold)
self._rpn_num_center_samples_per_im = rpn_num_center_samples_per_im
# Class manipulation.
self._class_agnostic = class_agnostic
self._train_class = train_class
def _parse_train_data(self, data):
"""Parses data for training.
Args:
data: the decoded tensor dictionary from TfExampleDecoder.
Returns:
image: image tensor that is preproessed to have normalized value and
dimension [output_size[0], output_size[1], 3]
labels: a dictionary of tensors used for training. The following describes
{key: value} pairs in the dictionary.
image_info: a 2D `Tensor` that encodes the information of the image and
the applied preprocessing. It is in the format of
[[original_height, original_width], [scaled_height, scaled_width],
anchor_boxes: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, 4] representing anchor boxes at each level.
rpn_score_targets: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, anchors_per_location]. The height_l and
width_l represent the dimension of class logits at l-th level.
rpn_box_targets: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, anchors_per_location * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
gt_boxes: Groundtruth bounding box annotations. The box is represented
in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
image that is fed to the network. The tennsor is padded with -1 to
the fixed dimension [self._max_num_instances, 4].
gt_classes: Groundtruth classes annotations. The tennsor is padded
with -1 to the fixed dimension [self._max_num_instances].
gt_masks: groundtrugh masks cropped by the bounding box and
resized to a fixed size determined by mask_crop_size.
"""
classes = data['groundtruth_classes']
boxes = data['groundtruth_boxes']
if self._include_mask:
masks = data['groundtruth_instance_masks']
is_crowds = data['groundtruth_is_crowd']
# Skips annotations with `is_crowd` = True.
if self._skip_crowd_during_training and self._is_training:
num_groundtruths = tf.shape(classes)[0]
with tf.control_dependencies([num_groundtruths, is_crowds]):
indices = tf.cond(
tf.greater(tf.size(is_crowds), 0),
lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
lambda: tf.cast(tf.range(num_groundtruths), tf.int64))
classes = tf.gather(classes, indices)
boxes = tf.gather(boxes, indices)
if self._include_mask:
masks = tf.gather(masks, indices)
# Gets original image and its size.
image = data['image']
image_shape = tf.shape(image)[0:2]
# Normalizes image with mean and std pixel values.
image = input_utils.normalize_image(image)
# Flips image randomly during training.
if self._aug_rand_hflip:
if self._include_mask:
image, boxes, masks = input_utils.random_horizontal_flip(
image, boxes, masks)
else:
image, boxes = input_utils.random_horizontal_flip(
image, boxes)
# Converts boxes from normalized coordinates to pixel coordinates.
# Now the coordinates of boxes are w.r.t. the original image.
boxes = box_utils.denormalize_boxes(boxes, image_shape)
# Resizes and crops image.
image, image_info = input_utils.resize_and_crop_image(
image,
self._output_size,
padded_size=input_utils.compute_padded_size(
self._output_size, 2 ** self._max_level),
aug_scale_min=self._aug_scale_min,
aug_scale_max=self._aug_scale_max)
image_height, image_width, _ = image.get_shape().as_list()
# Resizes and crops boxes.
# Now the coordinates of boxes are w.r.t the scaled image.
image_scale = image_info[2, :]
offset = image_info[3, :]
boxes = input_utils.resize_and_crop_boxes(
boxes, image_scale, image_info[1, :], offset)
# Filters out ground truth boxes that are all zeros.
indices = box_utils.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices)
if self._include_mask:
masks = tf.gather(masks, indices)
# Transfer boxes to the original image space and do normalization.
cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2])
cropped_boxes = box_utils.normalize_boxes(cropped_boxes, image_shape)
num_masks = tf.shape(masks)[0]
masks = tf.image.crop_and_resize(
tf.expand_dims(masks, axis=-1),
cropped_boxes,
box_indices=tf.range(num_masks, dtype=tf.int32),
crop_size=[self._mask_crop_size, self._mask_crop_size],
method='bilinear')
masks = tf.squeeze(masks, axis=-1)
# Class manipulation.
# Filter out novel split classes from training.
if self._train_class != 'all':
valid_classes = tf.cast(
class_utils.coco_split_class_ids(self._train_class),
dtype=classes.dtype)
match = tf.reduce_any(tf.equal(
tf.expand_dims(valid_classes, 1),
tf.expand_dims(classes, 0)), 0)
# kill novel split classes and boxes.
boxes = tf.gather(boxes, tf.where(match)[:, 0])
classes = tf.gather(classes, tf.where(match)[:, 0])
if self._include_mask:
masks = tf.gather(masks, tf.where(match)[:, 0])
# Assigns anchor targets.
# Note that after the target assignment, box targets are absolute pixel
# offsets w.r.t. the scaled image.
input_anchor = anchor.Anchor(
self._min_level,
self._max_level,
self._num_scales,
self._aspect_ratios,
self._anchor_size,
(image_height, image_width))
anchor_labeler = anchor.OlnAnchorLabeler(
input_anchor,
self._rpn_match_threshold,
self._rpn_unmatched_threshold,
self._rpn_batch_size_per_im,
self._rpn_fg_fraction,
# for centerness target.
self._has_centerness,
self._rpn_center_match_iou_threshold,
self._rpn_center_unmatched_iou_threshold,
self._rpn_num_center_samples_per_im,)
if self._has_centerness:
rpn_score_targets, _, rpn_lrtb_targets, rpn_center_targets = (
anchor_labeler.label_anchors_lrtb(
gt_boxes=boxes,
gt_labels=tf.cast(
tf.expand_dims(classes, axis=-1), dtype=tf.float32)))
else:
rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
boxes, tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))
# For base rpn, dummy placeholder for centerness target.
rpn_center_targets = rpn_score_targets.copy()
# If bfloat16 is used, casts input image to tf.bfloat16.
if self._use_bfloat16:
image = tf.cast(image, dtype=tf.bfloat16)
inputs = {
'image': image,
'image_info': image_info,
}
# Packs labels for model_fn outputs.
labels = {
'anchor_boxes': input_anchor.multilevel_boxes,
'image_info': image_info,
'rpn_score_targets': rpn_score_targets,
'rpn_box_targets': (rpn_lrtb_targets if self._has_centerness
else rpn_box_targets),
'rpn_center_targets': rpn_center_targets,
}
# If class_agnostic, convert to binary classes.
if self._class_agnostic:
classes = tf.where(tf.greater(classes, 0),
tf.ones_like(classes),
tf.zeros_like(classes))
inputs['gt_boxes'] = input_utils.pad_to_fixed_size(boxes,
self._max_num_instances,
-1)
inputs['gt_classes'] = input_utils.pad_to_fixed_size(
classes, self._max_num_instances, -1)
if self._include_mask:
inputs['gt_masks'] = input_utils.pad_to_fixed_size(
masks, self._max_num_instances, -1)
return inputs, labels
...@@ -366,6 +366,156 @@ def decode_boxes(encoded_boxes, anchors, weights=None): ...@@ -366,6 +366,156 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
return decoded_boxes return decoded_boxes
def encode_boxes_lrtb(boxes, anchors, weights=None):
"""Encode boxes to targets on lrtb (=left,right,top,bottom) format.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes_lrtb: a tensor whose shape is the same as `boxes` representing
the encoded box targets. The box targets encode the left, right, top,
bottom distances from an anchor location to the four borders of the
matched groundtruth bounding box.
center_targets: centerness targets defined by the left, right, top, and
bottom distance targets. The centerness is defined as the deviation of the
anchor location from the groundtruth object center. Formally, centerness =
sqrt(min(left, right)/max(left, right)*min(top, bottom)/max(top, bottom)).
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if boxes.shape[-1] != 4:
raise ValueError(
'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1]))
with tf.name_scope('encode_boxes_lrtb'):
boxes = tf.cast(boxes, dtype=anchors.dtype)
ymin = boxes[..., 0:1]
xmin = boxes[..., 1:2]
ymax = boxes[..., 2:3]
xmax = boxes[..., 3:4]
# box_h = ymax - ymin + 1.0
# box_w = xmax - xmin + 1.0
box_h = ymax - ymin
box_w = xmax - xmin
anchor_ymin = anchors[..., 0:1]
anchor_xmin = anchors[..., 1:2]
anchor_ymax = anchors[..., 2:3]
anchor_xmax = anchors[..., 3:4]
# anchor_h = anchor_ymax - anchor_ymin + 1.0
# anchor_w = anchor_xmax - anchor_xmin + 1.0
anchor_h = anchor_ymax - anchor_ymin
anchor_w = anchor_xmax - anchor_xmin
anchor_yc = anchor_ymin + 0.5 * anchor_h
anchor_xc = anchor_xmin + 0.5 * anchor_w
box_h += EPSILON
box_w += EPSILON
anchor_h += EPSILON
anchor_w += EPSILON
left = (anchor_xc - xmin) / anchor_w
right = (xmax - anchor_xc) / anchor_w
top = (anchor_yc - ymin) / anchor_h
bottom = (ymax - anchor_yc) / anchor_h
# Create centerness target. {
lrtb_targets = tf.concat([left, right, top, bottom], axis=-1)
valid_match = tf.greater(tf.reduce_min(lrtb_targets, -1), 0.0)
# Centerness score.
left_right = tf.concat([left, right], axis=-1)
left_right = tf.where(tf.stack([valid_match, valid_match], -1),
left_right, tf.zeros_like(left_right))
top_bottom = tf.concat([top, bottom], axis=-1)
top_bottom = tf.where(tf.stack([valid_match, valid_match], -1),
top_bottom, tf.zeros_like(top_bottom))
center_targets = tf.sqrt(
(tf.reduce_min(left_right, -1) /
(tf.reduce_max(left_right, -1) + EPSILON)) *
(tf.reduce_min(top_bottom, -1) /
(tf.reduce_max(top_bottom, -1) + EPSILON)))
center_targets = tf.where(valid_match,
center_targets,
tf.zeros_like(center_targets))
if weights:
left *= weights[0]
right *= weights[1]
top *= weights[2]
bottom *= weights[3]
encoded_boxes_lrtb = tf.concat(
[left, right, top, bottom],
axis=-1)
return encoded_boxes_lrtb, center_targets
def decode_boxes_lrtb(encoded_boxes_lrtb, anchors, weights=None):
"""Decode boxes.
Args:
encoded_boxes_lrtb: a tensor whose last dimension is 4 representing the
coordinates of encoded boxes in left, right, top, bottom order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
decoded_boxes_lrtb: a tensor whose shape is the same as `boxes` representing
the decoded box targets in lrtb (=left,right,top,bottom) format. The box
decoded box coordinates represent the left, right, top, and bottom
distances from an anchor location to the four borders of the matched
groundtruth bounding box.
"""
if encoded_boxes_lrtb.shape[-1] != 4:
raise ValueError(
'encoded_boxes_lrtb.shape[-1] is {:d}, but must be 4.'
.format(encoded_boxes_lrtb.shape[-1]))
with tf.name_scope('decode_boxes_lrtb'):
encoded_boxes_lrtb = tf.cast(encoded_boxes_lrtb, dtype=anchors.dtype)
left = encoded_boxes_lrtb[..., 0:1]
right = encoded_boxes_lrtb[..., 1:2]
top = encoded_boxes_lrtb[..., 2:3]
bottom = encoded_boxes_lrtb[..., 3:4]
if weights:
left /= weights[0]
right /= weights[1]
top /= weights[2]
bottom /= weights[3]
anchor_ymin = anchors[..., 0:1]
anchor_xmin = anchors[..., 1:2]
anchor_ymax = anchors[..., 2:3]
anchor_xmax = anchors[..., 3:4]
anchor_h = anchor_ymax - anchor_ymin
anchor_w = anchor_xmax - anchor_xmin
anchor_yc = anchor_ymin + 0.5 * anchor_h
anchor_xc = anchor_xmin + 0.5 * anchor_w
anchor_h += EPSILON
anchor_w += EPSILON
decoded_boxes_ymin = anchor_yc - top * anchor_h
decoded_boxes_xmin = anchor_xc - left * anchor_w
decoded_boxes_ymax = anchor_yc + bottom * anchor_h
decoded_boxes_xmax = anchor_xc + right * anchor_w
decoded_boxes_lrtb = tf.concat(
[decoded_boxes_ymin, decoded_boxes_xmin,
decoded_boxes_ymax, decoded_boxes_xmax],
axis=-1)
return decoded_boxes_lrtb
def filter_boxes(boxes, scores, image_shape, min_size_threshold): def filter_boxes(boxes, scores, image_shape, min_size_threshold):
"""Filter and remove boxes that are too small or fall outside the image. """Filter and remove boxes that are too small or fall outside the image.
......
...@@ -315,3 +315,209 @@ class TargetAssigner(object): ...@@ -315,3 +315,209 @@ class TargetAssigner(object):
BoxCoder object. BoxCoder object.
""" """
return self._box_coder return self._box_coder
class OlnTargetAssigner(TargetAssigner):
"""Target assigner to compute classification and regression targets."""
def __init__(self,
similarity_calc,
matcher,
box_coder,
negative_class_weight=1.0,
unmatched_cls_target=None,
center_matcher=None):
"""Construct Object Detection Target Assigner.
Args:
similarity_calc: a RegionSimilarityCalculator
matcher: Matcher used to match groundtruth to anchors.
box_coder: BoxCoder used to encode matching groundtruth boxes with respect
to anchors.
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0). The weight must be in [0., 1.].
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each anchor (and
can be empty for scalar targets). This shape must thus be compatible
with the groundtruth labels that are passed to the "assign" function
(which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). If set to None,
unmatched_cls_target is set to be [0] for each anchor.
center_matcher: Matcher used to match groundtruth to anchors to sample and
assign the regression targets of centerness to each anchor.
Raises:
ValueError: if similarity_calc is not a RegionSimilarityCalculator or
if matcher is not a Matcher or if box_coder is not a BoxCoder
"""
super(OlnTargetAssigner, self).__init__(
similarity_calc=similarity_calc,
matcher=matcher,
box_coder=box_coder,
negative_class_weight=negative_class_weight,
unmatched_cls_target=unmatched_cls_target)
# centerness-matcher with independent sampling IoU threshold.
self._center_matcher = center_matcher
def assign(self,
anchors,
groundtruth_boxes,
groundtruth_labels=None,
groundtruth_weights=None,
**params):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
to groundtruth_boxes and assign classification and regression targets to
each anchor as well as weights based on the resulting match (specifying,
e.g., which anchors should not contribute to training loss).
Anchors that are not matched to anything are given a classification target
of self._unmatched_cls_target which can be specified via the constructor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_labels: a tensor of shape [M, d_1, ... d_k] with labels for
each of the ground_truth boxes. The subshape [d_1, ... d_k] can be empty
(corresponding to scalar inputs). When set to None, groundtruth_labels
assumes a binary problem where all ground_truth boxes get a positive
label (of 1).
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
**params: Additional keyword arguments for specific implementations of the
Matcher.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
cls_weights: a float32 tensor with shape [num_anchors]
reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
reg_weights: a float32 tensor with shape [num_anchors]
match: a matcher.Match object encoding the match between anchors and
groundtruth boxes, with rows corresponding to groundtruth boxes
and columns corresponding to anchors.
matched_gt_boxlist: a BoxList object with data of float32 tensor with
shape [num_anchors, box_dimension] which encodes the coordinates of the
matched groundtruth boxes.
matched_anchors_mask: a Bool tensor with shape [num_anchors] which
indicates whether an anchor is matched or not.
center_matched_gt_boxlist: a BoxList object with data of float32 tensor
with shape [num_anchors, box_dimension] which encodes the coordinates of
the groundtruth boxes matched for centerness target assignment.
center_matched_anchors_mask: a Boolean tensor with shape [num_anchors]
which indicates whether an anchor is matched or not for centerness
target assignment.
matched_ious: a float32 tensor with shape [num_anchors] which encodes the
ious between each anchor and the matched groundtruth boxes.
Raises:
ValueError: if anchors or groundtruth_boxes are not of type
box_list.BoxList
"""
if not isinstance(anchors, box_list.BoxList):
raise ValueError('anchors must be an BoxList')
if not isinstance(groundtruth_boxes, box_list.BoxList):
raise ValueError('groundtruth_boxes must be an BoxList')
if groundtruth_labels is None:
groundtruth_labels = tf.ones(
tf.expand_dims(groundtruth_boxes.num_boxes(), 0))
groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
unmatched_shape_assert = shape_utils.assert_shape_equal(
shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
shape_utils.combined_static_and_dynamic_shape(
self._unmatched_cls_target))
labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[:1],
shape_utils.combined_static_and_dynamic_shape(
groundtruth_boxes.get())[:1])
if groundtruth_weights is None:
num_gt_boxes = groundtruth_boxes.num_boxes_static()
if not num_gt_boxes:
num_gt_boxes = groundtruth_boxes.num_boxes()
groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
with tf.control_dependencies(
[unmatched_shape_assert, labels_and_box_shapes_assert]):
match_quality_matrix = self._similarity_calc(
groundtruth_boxes.get(), anchors.get())
match = self._matcher.match(match_quality_matrix, **params)
reg_targets, matched_gt_boxlist, matched_anchors_mask = (
self._create_regression_targets(anchors,
groundtruth_boxes,
match))
cls_targets = self._create_classification_targets(groundtruth_labels,
match)
reg_weights = self._create_regression_weights(match, groundtruth_weights)
cls_weights = self._create_classification_weights(match,
groundtruth_weights)
# Match for creation of centerness regression targets.
if self._center_matcher is not None:
center_match = self._center_matcher.match(
match_quality_matrix, **params)
center_matched_gt_boxes = center_match.gather_based_on_match(
groundtruth_boxes.get(),
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
center_matched_gt_boxlist = box_list.BoxList(center_matched_gt_boxes)
center_matched_anchors_mask = center_match.matched_column_indicator()
num_anchors = anchors.num_boxes_static()
if num_anchors is not None:
reg_targets = self._reset_target_shape(reg_targets, num_anchors)
cls_targets = self._reset_target_shape(cls_targets, num_anchors)
reg_weights = self._reset_target_shape(reg_weights, num_anchors)
cls_weights = self._reset_target_shape(cls_weights, num_anchors)
if self._center_matcher is not None:
matched_ious = tf.reduce_max(match_quality_matrix, 0)
return (cls_targets, cls_weights, reg_targets, reg_weights, match,
matched_gt_boxlist, matched_anchors_mask,
center_matched_gt_boxlist, center_matched_anchors_mask,
matched_ious)
else:
return (cls_targets, cls_weights, reg_targets, reg_weights, match)
def _create_regression_targets(self, anchors, groundtruth_boxes, match):
"""Returns a regression target for each anchor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth_boxes
match: a matcher.Match object
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
matched_gt_boxes = match.gather_based_on_match(
groundtruth_boxes.get(),
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME):
groundtruth_keypoints = groundtruth_boxes.get_field(KEYPOINTS_FIELD_NAME)
matched_keypoints = match.gather_based_on_match(
groundtruth_keypoints,
unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME, matched_keypoints)
matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
match_results_shape = shape_utils.combined_static_and_dynamic_shape(
match.match_results)
# Zero out the unmatched and ignored regression targets.
unmatched_ignored_reg_targets = tf.tile(self._default_regression_target(),
[match_results_shape[0], 1])
matched_anchors_mask = match.matched_column_indicator()
# To broadcast matched_anchors_mask to the same shape as
# matched_reg_targets.
matched_anchors_mask_tiled = tf.tile(
tf.expand_dims(matched_anchors_mask, 1),
[1, tf.shape(matched_reg_targets)[1]])
reg_targets = tf.where(matched_anchors_mask_tiled,
matched_reg_targets,
unmatched_ignored_reg_targets)
return reg_targets, matched_gt_boxlist, matched_anchors_mask
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment