Commit d4f37e87 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 344335367
parent 51f4ecad
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
from official.modeling.hyperparams import params_dict from official.modeling.hyperparams import params_dict
from official.vision.detection.configs import maskrcnn_config from official.vision.detection.configs import maskrcnn_config
from official.vision.detection.configs import olnmask_config
from official.vision.detection.configs import retinanet_config from official.vision.detection.configs import retinanet_config
from official.vision.detection.configs import shapemask_config from official.vision.detection.configs import shapemask_config
...@@ -28,6 +29,9 @@ def config_generator(model): ...@@ -28,6 +29,9 @@ def config_generator(model):
elif model == 'mask_rcnn': elif model == 'mask_rcnn':
default_config = maskrcnn_config.MASKRCNN_CFG default_config = maskrcnn_config.MASKRCNN_CFG
restrictions = maskrcnn_config.MASKRCNN_RESTRICTIONS restrictions = maskrcnn_config.MASKRCNN_RESTRICTIONS
elif model == 'olnmask':
default_config = olnmask_config.OLNMASK_CFG
restrictions = olnmask_config.OLNMASK_RESTRICTIONS
elif model == 'shapemask': elif model == 'shapemask':
default_config = shapemask_config.SHAPEMASK_CFG default_config = shapemask_config.SHAPEMASK_CFG
restrictions = shapemask_config.SHAPEMASK_RESTRICTIONS restrictions = shapemask_config.SHAPEMASK_RESTRICTIONS
......
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Config template to train Object Localization Network (OLN)."""
from official.modeling.hyperparams import params_dict
from official.vision.detection.configs import base_config
# pylint: disable=line-too-long
OLNMASK_CFG = params_dict.ParamsDict(base_config.BASE_CFG)
OLNMASK_CFG.override({
'type': 'olnmask',
'eval': {
'type': 'oln_xclass_box',
'use_category': False,
'seen_class': 'voc',
'num_images_to_visualize': 0,
},
'architecture': {
'parser': 'olnmask_parser',
'min_level': 2,
'max_level': 6,
'include_rpn_class': False,
'include_frcnn_class': False,
'include_frcnn_box': True,
'include_mask': False,
'mask_target_size': 28,
'num_classes': 2,
},
'olnmask_parser': {
'output_size': [640, 640],
'num_channels': 3,
'rpn_match_threshold': 0.7,
'rpn_unmatched_threshold': 0.3,
'rpn_batch_size_per_im': 256,
'rpn_fg_fraction': 0.5,
'aug_rand_hflip': True,
'aug_scale_min': 0.5,
'aug_scale_max': 2.0,
'skip_crowd_during_training': True,
'max_num_instances': 100,
'mask_crop_size': 112,
# centerness targets.
'has_centerness': True,
'rpn_center_match_iou_threshold': 0.3,
'rpn_center_unmatched_iou_threshold': 0.1,
'rpn_num_center_samples_per_im': 256,
# class manipulation.
'class_agnostic': True,
'train_class': 'voc',
},
'anchor': {
'num_scales': 1,
'aspect_ratios': [1.0],
'anchor_size': 8,
},
'rpn_head': {
'num_convs': 2,
'num_filters': 256,
'use_separable_conv': False,
'use_batch_norm': False,
# RPN-Centerness learning {
'has_centerness': True, # }
},
'frcnn_head': {
'num_convs': 0,
'num_filters': 256,
'use_separable_conv': False,
'num_fcs': 2,
'fc_dims': 1024,
'use_batch_norm': False,
'has_scoring': True,
},
'mrcnn_head': {
'num_convs': 4,
'num_filters': 256,
'use_separable_conv': False,
'use_batch_norm': False,
'has_scoring': False,
},
'rpn_score_loss': {
'rpn_batch_size_per_im': 256,
},
'rpn_box_loss': {
'huber_loss_delta': 1.0 / 9.0,
},
'frcnn_box_loss': {
'huber_loss_delta': 1.0,
},
'frcnn_box_score_loss': {
'ignore_threshold': 0.3,
},
'roi_proposal': {
'rpn_pre_nms_top_k': 2000,
'rpn_post_nms_top_k': 2000,
'rpn_nms_threshold': 0.7,
'rpn_score_threshold': 0.0,
'rpn_min_size_threshold': 0.0,
'test_rpn_pre_nms_top_k': 2000,
'test_rpn_post_nms_top_k': 2000,
'test_rpn_nms_threshold': 0.7,
'test_rpn_score_threshold': 0.0,
'test_rpn_min_size_threshold': 0.0,
'use_batched_nms': False,
},
'roi_sampling': {
'num_samples_per_image': 512,
'fg_fraction': 0.25,
'fg_iou_thresh': 0.5,
'bg_iou_thresh_hi': 0.5,
'bg_iou_thresh_lo': 0.0,
'mix_gt_boxes': True,
},
'mask_sampling': {
'num_mask_samples_per_image': 128, # Typically = `num_samples_per_image` * `fg_fraction`.
},
'postprocess': {
'use_batched_nms': False,
'max_total_size': 100,
'nms_iou_threshold': 0.5,
'score_threshold': 0.00,
'pre_nms_num_boxes': 2000,
},
}, is_strict=False)
OLNMASK_RESTRICTIONS = [
# 'anchor.aspect_ratios == [1.0]',
# 'anchor.scales == 1',
]
# pylint: enable=line-too-long
...@@ -407,3 +407,89 @@ class GenericDetectionGenerator(object): ...@@ -407,3 +407,89 @@ class GenericDetectionGenerator(object):
nmsed_classes += 1 nmsed_classes += 1
return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
class OlnDetectionGenerator(GenericDetectionGenerator):
"""Generates the final detected boxes with scores and classes."""
def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape,
is_single_fg_score=False, keep_nms=True):
"""Generate final detections for Object Localization Network (OLN).
Args:
box_outputs: a tensor of shape of [batch_size, K, num_classes * 4]
representing the class-specific box coordinates relative to anchors.
class_outputs: a tensor of shape of [batch_size, K, num_classes]
representing the class logits before applying score activiation.
anchor_boxes: a tensor of shape of [batch_size, K, 4] representing the
corresponding anchor boxes w.r.t `box_outputs`.
image_shape: a tensor of shape of [batch_size, 2] storing the image height
and width w.r.t. the scaled image, i.e. the same image space as
`box_outputs` and `anchor_boxes`.
is_single_fg_score: a Bool indicator of whether class_outputs includes the
background scores concatenated or not. By default, class_outputs is a
concatenation of both scores for the foreground and background. That is,
scores_without_bg=False.
keep_nms: a Bool indicator of whether to perform NMS or not.
Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: `float` Tensor of shape [batch_size, max_total_size]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: `int` Tensor of shape [batch_size, max_total_size]
representing classes for detected boxes.
valid_detections: `int` Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
if is_single_fg_score:
# Concatenates dummy background scores.
dummy_bg_scores = tf.zeros_like(class_outputs)
class_outputs = tf.stack([dummy_bg_scores, class_outputs], -1)
else:
class_outputs = tf.nn.softmax(class_outputs, axis=-1)
# Removes the background class.
class_outputs_shape = tf.shape(class_outputs)
batch_size = class_outputs_shape[0]
num_locations = class_outputs_shape[1]
num_classes = class_outputs_shape[-1]
num_detections = num_locations * (num_classes - 1)
class_outputs = tf.slice(class_outputs, [0, 0, 1], [-1, -1, -1])
box_outputs = tf.reshape(
box_outputs,
tf.stack([batch_size, num_locations, num_classes, 4], axis=-1))
box_outputs = tf.slice(box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1])
anchor_boxes = tf.tile(
tf.expand_dims(anchor_boxes, axis=2), [1, 1, num_classes - 1, 1])
box_outputs = tf.reshape(box_outputs,
tf.stack([batch_size, num_detections, 4], axis=-1))
anchor_boxes = tf.reshape(
anchor_boxes, tf.stack([batch_size, num_detections, 4], axis=-1))
# Box decoding. For RPN outputs, box_outputs are all zeros.
decoded_boxes = box_utils.decode_boxes(
box_outputs, anchor_boxes, weights=[10.0, 10.0, 5.0, 5.0])
# Box clipping
decoded_boxes = box_utils.clip_boxes(decoded_boxes, image_shape)
decoded_boxes = tf.reshape(
decoded_boxes,
tf.stack([batch_size, num_locations, num_classes - 1, 4], axis=-1))
if keep_nms:
nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = (
self._generate_detections(decoded_boxes, class_outputs))
# Adds 1 to offset the background class which has index 0.
nmsed_classes += 1
else:
nmsed_boxes = decoded_boxes[:, :, 0, :]
nmsed_scores = class_outputs[:, :, 0]
nmsed_classes = tf.cast(tf.ones_like(nmsed_scores), tf.int32)
valid_detections = tf.cast(
tf.reduce_sum(tf.ones_like(nmsed_scores), axis=-1), tf.int32)
return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
...@@ -231,3 +231,237 @@ class ROIGenerator(object): ...@@ -231,3 +231,237 @@ class ROIGenerator(object):
use_batched_nms=self._use_batched_nms, use_batched_nms=self._use_batched_nms,
apply_sigmoid_to_score=True) apply_sigmoid_to_score=True)
return proposed_rois, proposed_roi_scores return proposed_rois, proposed_roi_scores
class OlnROIGenerator(ROIGenerator):
"""Proposes RoIs for the second stage processing."""
def __call__(self, boxes, scores, anchor_boxes, image_shape, is_training,
is_box_lrtb=False, object_scores=None):
"""Generates RoI proposals.
Args:
boxes: a dict with keys representing FPN levels and values representing
box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4].
scores: a dict with keys representing FPN levels and values representing
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape [batch_size, feature_h,
feature_w, num_anchors * 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension
are [height, width] of the scaled image.
is_training: a bool indicating whether it is in training or inference
mode.
is_box_lrtb: a bool indicating whether boxes are in lrtb (=left,right,top,
bottom) format.
object_scores: another objectness score (e.g., centerness). In OLN, we use
object_scores=centerness as a replacement of the scores at each level.
A dict with keys representing FPN levels and values representing logit
tensors of shape [batch_size, feature_h, feature_w, num_anchors].
Returns:
proposed_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 4],
representing the box coordinates of the proposed RoIs w.r.t. the
scaled image.
proposed_roi_scores: a tensor of shape
[batch_size, rpn_post_nms_top_k, 1], representing the scores of the
proposed RoIs.
"""
proposed_rois, proposed_roi_scores = self.oln_multilevel_propose_rois(
boxes,
scores,
anchor_boxes,
image_shape,
rpn_pre_nms_top_k=(self._rpn_pre_nms_top_k
if is_training else self._test_rpn_pre_nms_top_k),
rpn_post_nms_top_k=(self._rpn_post_nms_top_k
if is_training else self._test_rpn_post_nms_top_k),
rpn_nms_threshold=(self._rpn_nms_threshold
if is_training else self._test_rpn_nms_threshold),
rpn_score_threshold=(self._rpn_score_threshold if is_training else
self._test_rpn_score_threshold),
rpn_min_size_threshold=(self._rpn_min_size_threshold if is_training else
self._test_rpn_min_size_threshold),
decode_boxes=True,
clip_boxes=True,
use_batched_nms=self._use_batched_nms,
apply_sigmoid_to_score=True,
is_box_lrtb=is_box_lrtb,
rpn_object_scores=object_scores,)
return proposed_rois, proposed_roi_scores
def oln_multilevel_propose_rois(self,
rpn_boxes,
rpn_scores,
anchor_boxes,
image_shape,
rpn_pre_nms_top_k=2000,
rpn_post_nms_top_k=1000,
rpn_nms_threshold=0.7,
rpn_score_threshold=0.0,
rpn_min_size_threshold=0.0,
decode_boxes=True,
clip_boxes=True,
use_batched_nms=False,
apply_sigmoid_to_score=True,
is_box_lrtb=False,
rpn_object_scores=None,):
"""Proposes RoIs given a group of candidates from different FPN levels.
The following describes the steps:
1. For each individual level:
a. Adjust scores for each level if specified by rpn_object_scores.
b. Apply sigmoid transform if specified.
c. Decode boxes (either of xyhw or left-right-top-bottom format) if
specified.
d. Clip boxes if specified.
e. Filter small boxes and those fall outside image if specified.
f. Apply pre-NMS filtering including pre-NMS top k and score
thresholding.
g. Apply NMS.
2. Aggregate post-NMS boxes from each level.
3. Apply an overall top k to generate the final selected RoIs.
Args:
rpn_boxes: a dict with keys representing FPN levels and values
representing box tenors of shape [batch_size, feature_h, feature_w,
num_anchors * 4].
rpn_scores: a dict with keys representing FPN levels and values
representing logit tensors of shape [batch_size, feature_h, feature_w,
num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape [batch_size, feature_h,
feature_w, num_anchors * 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension
are [height, width] of the scaled image.
rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to
keep before applying NMS. Default: 2000.
rpn_post_nms_top_k: an integer of top scoring RPN proposals *in total* to
keep after applying NMS. Default: 1000.
rpn_nms_threshold: a float between 0 and 1 representing the IoU threshold
used for NMS. If 0.0, no NMS is applied. Default: 0.7.
rpn_score_threshold: a float between 0 and 1 representing the minimal box
score to keep before applying NMS. This is often used as a pre-filtering
step for better performance. If 0, no filtering is applied. Default: 0.
rpn_min_size_threshold: a float representing the minimal box size in each
side (w.r.t. the scaled image) to keep before applying NMS. This is
often used as a pre-filtering step for better performance. If 0, no
filtering is applied. Default: 0.
decode_boxes: a boolean indicating whether `rpn_boxes` needs to be decoded
using `anchor_boxes`. If False, use `rpn_boxes` directly and ignore
`anchor_boxes`. Default: True.
clip_boxes: a boolean indicating whether boxes are first clipped to the
scaled image size before appliying NMS. If False, no clipping is applied
and `image_shape` is ignored. Default: True.
use_batched_nms: a boolean indicating whether NMS is applied in batch
using `tf.image.combined_non_max_suppression`. Currently only available
in CPU/GPU. Default: False.
apply_sigmoid_to_score: a boolean indicating whether apply sigmoid to
`rpn_scores` before applying NMS. Default: True.
is_box_lrtb: a bool indicating whether boxes are in lrtb (=left,right,top,
bottom) format.
rpn_object_scores: a predicted objectness score (e.g., centerness). In
OLN, we use object_scores=centerness as a replacement of the scores at
each level. A dict with keys representing FPN levels and values
representing logit tensors of shape [batch_size, feature_h, feature_w,
num_anchors].
Returns:
selected_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 4],
representing the box coordinates of the selected proposals w.r.t. the
scaled image.
selected_roi_scores: a tensor of shape [batch_size, rpn_post_nms_top_k,
1],representing the scores of the selected proposals.
"""
with tf.name_scope('multilevel_propose_rois'):
rois = []
roi_scores = []
image_shape = tf.expand_dims(image_shape, axis=1)
for level in sorted(rpn_scores.keys()):
with tf.name_scope('level_%d' % level):
_, feature_h, feature_w, num_anchors_per_location = (
rpn_scores[level].get_shape().as_list())
num_boxes = feature_h * feature_w * num_anchors_per_location
this_level_scores = tf.reshape(rpn_scores[level], [-1, num_boxes])
this_level_boxes = tf.reshape(rpn_boxes[level], [-1, num_boxes, 4])
this_level_anchors = tf.cast(
tf.reshape(anchor_boxes[level], [-1, num_boxes, 4]),
dtype=this_level_scores.dtype)
if rpn_object_scores:
this_level_object_scores = rpn_object_scores[level]
this_level_object_scores = tf.reshape(this_level_object_scores,
[-1, num_boxes])
this_level_object_scores = tf.cast(this_level_object_scores,
this_level_scores.dtype)
this_level_scores = this_level_object_scores
if apply_sigmoid_to_score:
this_level_scores = tf.sigmoid(this_level_scores)
if decode_boxes:
if is_box_lrtb: # Box in left-right-top-bottom format.
this_level_boxes = box_utils.decode_boxes_lrtb(
this_level_boxes, this_level_anchors)
else: # Box in standard x-y-h-w format.
this_level_boxes = box_utils.decode_boxes(
this_level_boxes, this_level_anchors)
if clip_boxes:
this_level_boxes = box_utils.clip_boxes(
this_level_boxes, image_shape)
if rpn_min_size_threshold > 0.0:
this_level_boxes, this_level_scores = box_utils.filter_boxes(
this_level_boxes, this_level_scores, image_shape,
rpn_min_size_threshold)
this_level_pre_nms_top_k = min(num_boxes, rpn_pre_nms_top_k)
this_level_post_nms_top_k = min(num_boxes, rpn_post_nms_top_k)
if rpn_nms_threshold > 0.0:
if use_batched_nms:
this_level_rois, this_level_roi_scores, _, _ = (
tf.image.combined_non_max_suppression(
tf.expand_dims(this_level_boxes, axis=2),
tf.expand_dims(this_level_scores, axis=-1),
max_output_size_per_class=this_level_pre_nms_top_k,
max_total_size=this_level_post_nms_top_k,
iou_threshold=rpn_nms_threshold,
score_threshold=rpn_score_threshold,
pad_per_class=False,
clip_boxes=False))
else:
if rpn_score_threshold > 0.0:
this_level_boxes, this_level_scores = (
box_utils.filter_boxes_by_scores(this_level_boxes,
this_level_scores,
rpn_score_threshold))
this_level_boxes, this_level_scores = box_utils.top_k_boxes(
this_level_boxes, this_level_scores,
k=this_level_pre_nms_top_k)
this_level_roi_scores, this_level_rois = (
nms.sorted_non_max_suppression_padded(
this_level_scores,
this_level_boxes,
max_output_size=this_level_post_nms_top_k,
iou_threshold=rpn_nms_threshold))
else:
this_level_rois, this_level_roi_scores = box_utils.top_k_boxes(
this_level_rois, this_level_scores, k=this_level_post_nms_top_k)
rois.append(this_level_rois)
roi_scores.append(this_level_roi_scores)
all_rois = tf.concat(rois, axis=1)
all_roi_scores = tf.concat(roi_scores, axis=1)
with tf.name_scope('top_k_rois'):
_, num_valid_rois = all_roi_scores.get_shape().as_list()
overall_top_k = min(num_valid_rois, rpn_post_nms_top_k)
selected_rois, selected_roi_scores = box_utils.top_k_boxes(
all_rois, all_roi_scores, k=overall_top_k)
return selected_rois, selected_roi_scores
...@@ -342,6 +342,180 @@ class ROISampler(object): ...@@ -342,6 +342,180 @@ class ROISampler(object):
sampled_gt_indices) sampled_gt_indices)
class ROIScoreSampler(ROISampler):
"""Samples RoIs, RoI-scores and creates training targets."""
def __call__(self, rois, roi_scores, gt_boxes, gt_classes):
"""Sample and assign RoIs for training.
Args:
rois: a tensor of shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment. The last dimension is the box
coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
roi_scores:
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
coordinates of gt_boxes are in the pixel coordinates of the scaled
image. This tensor might have padding of values -1 indicating the
invalid box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with values of -1 indicating the invalid
classes.
Returns:
sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
coordinates of the sampled RoIs, where K is the number of the sampled
RoIs, i.e. K = num_samples_per_image.
sampled_roi_scores:
sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
box coordinates of the matched groundtruth boxes of the samples RoIs.
sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
classes of the matched groundtruth boxes of the sampled RoIs.
"""
(sampled_rois, sampled_roi_scores, sampled_gt_boxes, sampled_gt_classes,
sampled_gt_indices) = (
self.assign_and_sample_proposals_and_scores(
rois,
roi_scores,
gt_boxes,
gt_classes,
num_samples_per_image=self._num_samples_per_image,
mix_gt_boxes=self._mix_gt_boxes,
fg_fraction=self._fg_fraction,
fg_iou_thresh=self._fg_iou_thresh,
bg_iou_thresh_hi=self._bg_iou_thresh_hi,
bg_iou_thresh_lo=self._bg_iou_thresh_lo))
return (sampled_rois, sampled_roi_scores, sampled_gt_boxes,
sampled_gt_classes, sampled_gt_indices)
def assign_and_sample_proposals_and_scores(self,
proposed_boxes,
proposed_scores,
gt_boxes,
gt_classes,
num_samples_per_image=512,
mix_gt_boxes=True,
fg_fraction=0.25,
fg_iou_thresh=0.5,
bg_iou_thresh_hi=0.5,
bg_iou_thresh_lo=0.0):
"""Assigns the proposals with groundtruth classes and performs subsmpling.
Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the
following algorithm to generate the final `num_samples_per_image` RoIs.
1. Calculates the IoU between each proposal box and each gt_boxes.
2. Assigns each proposed box with a groundtruth class and box by choosing
the largest IoU overlap.
3. Samples `num_samples_per_image` boxes from all proposed boxes, and
returns box_targets, class_targets, and RoIs.
Args:
proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number
of proposals before groundtruth assignment. The last dimension is the
box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
format.
proposed_scores: a tensor of shape of [batch_size, N]. N is the number of
proposals before groundtruth assignment. It is the rpn scores for all
proposed boxes which can be either their classification or centerness
scores.
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
coordinates of gt_boxes are in the pixel coordinates of the scaled
image. This tensor might have padding of values -1 indicating the
invalid box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with values of -1 indicating the invalid
classes.
num_samples_per_image: a integer represents RoI minibatch size per image.
mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes
before sampling proposals.
fg_fraction: a float represents the target fraction of RoI minibatch that
is labeled foreground (i.e., class > 0).
fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to
be considered foreground (if >= fg_iou_thresh).
bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI
to be considered background (class = 0 if overlap in [LO, HI)).
bg_iou_thresh_lo: a float represents the IoU overlap threshold for an RoI
to be considered background (class = 0 if overlap in [LO, HI)).
Returns:
sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
coordinates of the sampled RoIs, where K is the number of the sampled
RoIs, i.e. K = num_samples_per_image.
sampled_scores: a tensor of shape of [batch_size, K], representing the
confidence score of the sampled RoIs, where K is the number of the
sampled RoIs, i.e. K = num_samples_per_image.
sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
box coordinates of the matched groundtruth boxes of the samples RoIs.
sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
classes of the matched groundtruth boxes of the sampled RoIs.
sampled_gt_indices: a tensor of shape of [batch_size, K], storing the
indices of the sampled groudntruth boxes in the original `gt_boxes`
tensor, i.e. gt_boxes[sampled_gt_indices[:, i]] =
sampled_gt_boxes[:, i].
"""
with tf.name_scope('sample_proposals_and_scores'):
if mix_gt_boxes:
boxes = tf.concat([proposed_boxes, gt_boxes], axis=1)
gt_scores = tf.ones_like(gt_boxes[:, :, 0])
scores = tf.concat([proposed_scores, gt_scores], axis=1)
else:
boxes = proposed_boxes
scores = proposed_scores
(matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou,
_) = box_matching(boxes, gt_boxes, gt_classes)
positive_match = tf.greater(matched_iou, fg_iou_thresh)
negative_match = tf.logical_and(
tf.greater_equal(matched_iou, bg_iou_thresh_lo),
tf.less(matched_iou, bg_iou_thresh_hi))
ignored_match = tf.less(matched_iou, 0.0)
# re-assign negatively matched boxes to the background class.
matched_gt_classes = tf.where(negative_match,
tf.zeros_like(matched_gt_classes),
matched_gt_classes)
matched_gt_indices = tf.where(negative_match,
tf.zeros_like(matched_gt_indices),
matched_gt_indices)
sample_candidates = tf.logical_and(
tf.logical_or(positive_match, negative_match),
tf.logical_not(ignored_match))
sampler = (
balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
positive_fraction=fg_fraction, is_static=True))
batch_size, _ = sample_candidates.get_shape().as_list()
sampled_indicators = []
for i in range(batch_size):
sampled_indicator = sampler.subsample(sample_candidates[i],
num_samples_per_image,
positive_match[i])
sampled_indicators.append(sampled_indicator)
sampled_indicators = tf.stack(sampled_indicators)
_, sampled_indices = tf.nn.top_k(
tf.cast(sampled_indicators, dtype=tf.int32),
k=num_samples_per_image,
sorted=True)
sampled_indices_shape = tf.shape(sampled_indices)
batch_indices = (
tf.expand_dims(tf.range(sampled_indices_shape[0]), axis=-1) *
tf.ones([1, sampled_indices_shape[-1]], dtype=tf.int32))
gather_nd_indices = tf.stack([batch_indices, sampled_indices], axis=-1)
sampled_rois = tf.gather_nd(boxes, gather_nd_indices)
sampled_roi_scores = tf.gather_nd(scores, gather_nd_indices)
sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices)
sampled_gt_classes = tf.gather_nd(matched_gt_classes, gather_nd_indices)
sampled_gt_indices = tf.gather_nd(matched_gt_indices, gather_nd_indices)
return (sampled_rois, sampled_roi_scores, sampled_gt_boxes,
sampled_gt_classes, sampled_gt_indices)
class MaskSampler(object): class MaskSampler(object):
"""Samples and creates mask training targets.""" """Samples and creates mask training targets."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment