Commit ea41922c authored by Kai Chen's avatar Kai Chen
Browse files

renaming and refactoring for bbox methods

parent fdb44305
from .geometry import bbox_overlaps from .geometry import bbox_overlaps
from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps, from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps,
bbox_sampling, sample_positives, sample_negatives) bbox_sampling, bbox_sampling_pos, bbox_sampling_neg,
sample_bboxes)
from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
bbox_mapping_back, bbox2roi, roi2bbox, bbox2result) bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
from .bbox_target import bbox_target from .bbox_target import bbox_target
__all__ = [ __all__ = [
'bbox_overlaps', 'random_choice', 'bbox_assign', 'bbox_overlaps', 'random_choice', 'bbox_assign',
'bbox_assign_wrt_overlaps', 'bbox_sampling', 'sample_positives', 'bbox_assign_wrt_overlaps', 'bbox_sampling', 'bbox_sampling_pos',
'sample_negatives', 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_sampling_neg', 'sample_bboxes', 'bbox2delta', 'delta2bbox',
'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox',
'bbox_target' 'bbox2result', 'bbox_target'
] ]
...@@ -78,27 +78,32 @@ def bbox_assign_wrt_overlaps(overlaps, ...@@ -78,27 +78,32 @@ def bbox_assign_wrt_overlaps(overlaps,
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.5, neg_iou_thr=0.5,
min_pos_iou=.0): min_pos_iou=.0):
"""Assign a corresponding gt bbox or background to each proposal/anchor """Assign a corresponding gt bbox or background to each proposal/anchor.
This function assign a gt bbox to every proposal, each proposals will be
This method assign a gt bbox to every proposal, each proposals will be
assigned with -1, 0, or a positive number. -1 means don't care, 0 means assigned with -1, 0, or a positive number. -1 means don't care, 0 means
negative sample, positive number is the index (1-based) of assigned gt. negative sample, positive number is the index (1-based) of assigned gt.
The assignment is done in following steps, the order matters: The assignment is done in following steps, the order matters:
1. assign every anchor to -1 1. assign every anchor to -1
2. assign proposals whose iou with all gts < neg_iou_thr to 0 2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each anchor, if the iou with its nearest gt >= pos_iou_thr, 3. for each anchor, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox assign it to that bbox
4. for each gt bbox, assign its nearest proposals(may be more than one) 4. for each gt bbox, assign its nearest proposals(may be more than one)
to itself to itself
Args: Args:
overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k) overlaps (Tensor): Overlaps between n proposals and k gt_bboxes,
gt_labels(Tensor, optional): shape (k, ) shape(n, k).
pos_iou_thr(float): iou threshold for positive bboxes gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
neg_iou_thr(float or tuple): iou threshold for negative bboxes pos_iou_thr (float): IoU threshold for positive bboxes.
min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
for RPN, it is usually set as 0, for Fast R-CNN, min_pos_iou (float): Minimum IoU for a bbox to be considered as a
it is usually set as pos_iou_thr positive bbox. This argument only affects the 4th step.
Returns: Returns:
tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) tuple: (assigned_gt_inds, [assigned_labels], argmax_overlaps,
max_overlaps), shape (n, )
""" """
num_bboxes, num_gts = overlaps.size(0), overlaps.size(1) num_bboxes, num_gts = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default # 1. assign -1 by default
...@@ -144,8 +149,9 @@ def bbox_assign_wrt_overlaps(overlaps, ...@@ -144,8 +149,9 @@ def bbox_assign_wrt_overlaps(overlaps,
return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps
def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): def bbox_sampling_pos(assigned_gt_inds, num_expected, balance_sampling=True):
"""Balance sampling for positive bboxes/anchors """Balance sampling for positive bboxes/anchors.
1. calculate average positive num for each gt: num_per_gt 1. calculate average positive num for each gt: num_per_gt
2. sample at most num_per_gt positives for each gt 2. sample at most num_per_gt positives for each gt
3. random sampling from rest anchors if not enough fg 3. random sampling from rest anchors if not enough fg
...@@ -186,15 +192,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): ...@@ -186,15 +192,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
return sampled_inds return sampled_inds
def sample_negatives(assigned_gt_inds, def bbox_sampling_neg(assigned_gt_inds,
num_expected, num_expected,
max_overlaps=None, max_overlaps=None,
balance_thr=0, balance_thr=0,
hard_fraction=0.5): hard_fraction=0.5):
"""Balance sampling for negative bboxes/anchors """Balance sampling for negative bboxes/anchors.
negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr)
and easy(iou < balance_thr), around equal number of bg are sampled Negative samples are split into 2 set: hard (balance_thr <= iou <
from each set. neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled
by `hard_fraction`.
""" """
neg_inds = torch.nonzero(assigned_gt_inds == 0) neg_inds = torch.nonzero(assigned_gt_inds == 0)
if neg_inds.numel() != 0: if neg_inds.numel() != 0:
...@@ -247,17 +254,87 @@ def bbox_sampling(assigned_gt_inds, ...@@ -247,17 +254,87 @@ def bbox_sampling(assigned_gt_inds,
max_overlaps=None, max_overlaps=None,
neg_balance_thr=0, neg_balance_thr=0,
neg_hard_fraction=0.5): neg_hard_fraction=0.5):
"""Sample positive and negative bboxes given assigned results.
Args:
assigned_gt_inds (Tensor): Assigned gt indices for each bbox.
num_expected (int): Expected total samples (pos and neg).
pos_fraction (float): Positive sample fraction.
neg_pos_ub (float): Negative/Positive upper bound.
pos_balance_sampling(bool): Whether to sample positive samples around
each gt bbox evenly.
max_overlaps (Tensor, optional): For each bbox, the max IoU of all gts.
Used for negative balance sampling only.
neg_balance_thr (float, optional): IoU threshold for simple/hard
negative balance sampling.
neg_hard_fraction (float, optional): Fraction of hard negative samples
for negative balance sampling.
Returns:
tuple[Tensor]: positive bbox indices, negative bbox indices.
"""
num_expected_pos = int(num_expected * pos_fraction) num_expected_pos = int(num_expected * pos_fraction)
pos_inds = sample_positives(assigned_gt_inds, num_expected_pos, pos_inds = bbox_sampling_pos(assigned_gt_inds, num_expected_pos,
pos_balance_sampling) pos_balance_sampling)
# We found that sampled indices have duplicated items occasionally.
# (mab be a bug of PyTorch)
pos_inds = pos_inds.unique() pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel() num_sampled_pos = pos_inds.numel()
num_neg_max = int( num_neg_max = int(
neg_pos_ub * neg_pos_ub *
num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub) num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub)
num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos) num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos)
neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg, neg_inds = bbox_sampling_neg(assigned_gt_inds, num_expected_neg,
max_overlaps, neg_balance_thr, max_overlaps, neg_balance_thr,
neg_hard_fraction) neg_hard_fraction)
neg_inds = neg_inds.unique() neg_inds = neg_inds.unique()
return pos_inds, neg_inds return pos_inds, neg_inds
def sample_bboxes(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
"""Sample positive and negative bboxes.
This is a simple implementation of bbox sampling given candidates and
ground truth bboxes, which includes 3 steps.
1. Assign gt to each bbox.
2. Add gt bboxes to the sampling pool (optional).
3. Perform positive and negative sampling.
Args:
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_bboxes_ignore (Tensor): Ignored ground truth bboxes. In MS COCO,
`crowd` bboxes are considered as ignored.
gt_labels (Tensor): Class labels of ground truth bboxes.
cfg (dict): Sampling configs.
Returns:
tuple[Tensor]: pos_bboxes, neg_bboxes, pos_assigned_gt_inds,
pos_gt_bboxes, pos_gt_labels
"""
bboxes = bboxes[:, :4]
assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \
bbox_assign(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels,
cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou,
cfg.crowd_thr)
if cfg.add_gt_as_proposals:
bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
gt_assign_self = torch.arange(
1, len(gt_labels) + 1, dtype=torch.long, device=bboxes.device)
assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds])
assigned_labels = torch.cat([gt_labels, assigned_labels])
pos_inds, neg_inds = bbox_sampling(
assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub,
cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr)
pos_bboxes = bboxes[pos_inds]
neg_bboxes = bboxes[neg_inds]
pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
pos_gt_labels = assigned_labels[pos_inds]
return (pos_bboxes, neg_bboxes, pos_assigned_gt_inds, pos_gt_bboxes,
pos_gt_labels)
...@@ -4,7 +4,7 @@ import torch.nn as nn ...@@ -4,7 +4,7 @@ import torch.nn as nn
from .base import BaseDetector from .base import BaseDetector
from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin
from .. import builder from .. import builder
from mmdet.core import bbox2roi, bbox2result, multi_apply from mmdet.core import sample_bboxes, bbox2roi, bbox2result, multi_apply
class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin, class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
...@@ -97,13 +97,14 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin, ...@@ -97,13 +97,14 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
proposal_list = proposals proposal_list = proposals
if self.with_bbox: if self.with_bbox:
rcnn_train_cfg_list = [
self.train_cfg.rcnn for _ in range(len(proposal_list))
]
(pos_proposals, neg_proposals, pos_assigned_gt_inds, pos_gt_bboxes, (pos_proposals, neg_proposals, pos_assigned_gt_inds, pos_gt_bboxes,
pos_gt_labels) = multi_apply( pos_gt_labels) = multi_apply(
self.bbox_roi_extractor.sample_proposals, proposal_list, sample_bboxes,
gt_bboxes, gt_bboxes_ignore, gt_labels, rcnn_train_cfg_list) proposal_list,
gt_bboxes,
gt_bboxes_ignore,
gt_labels,
cfg=self.train_cfg.rcnn)
(labels, label_weights, bbox_targets, (labels, label_weights, bbox_targets,
bbox_weights) = self.bbox_head.get_bbox_target( bbox_weights) = self.bbox_head.get_bbox_target(
pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels,
......
from .single_level import SingleLevelRoI from .single_level import SingleRoIExtractor
__all__ = ['SingleLevelRoI'] __all__ = ['SingleRoIExtractor']
...@@ -4,19 +4,27 @@ import torch ...@@ -4,19 +4,27 @@ import torch
import torch.nn as nn import torch.nn as nn
from mmdet import ops from mmdet import ops
from mmdet.core import bbox_assign, bbox_sampling
class SingleLevelRoI(nn.Module): class SingleRoIExtractor(nn.Module):
"""Extract RoI features from a single level feature map. Each RoI is """Extract RoI features from a single level feature map.
mapped to a level according to its scale."""
If there are mulitple input feature levels, each RoI is mapped to a level
according to its scale.
Args:
roi_layer (dict): Specify RoI layer type and arguments.
out_channels (int): Output channels of RoI layers.
featmap_strides (int): Strides of input feature maps.
finest_scale (int): Scale threshold of mapping to level 0.
"""
def __init__(self, def __init__(self,
roi_layer, roi_layer,
out_channels, out_channels,
featmap_strides, featmap_strides,
finest_scale=56): finest_scale=56):
super(SingleLevelRoI, self).__init__() super(SingleRoIExtractor, self).__init__()
self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
self.out_channels = out_channels self.out_channels = out_channels
self.featmap_strides = featmap_strides self.featmap_strides = featmap_strides
...@@ -24,6 +32,7 @@ class SingleLevelRoI(nn.Module): ...@@ -24,6 +32,7 @@ class SingleLevelRoI(nn.Module):
@property @property
def num_inputs(self): def num_inputs(self):
"""int: Input feature map levels."""
return len(self.featmap_strides) return len(self.featmap_strides)
def init_weights(self): def init_weights(self):
...@@ -39,12 +48,19 @@ class SingleLevelRoI(nn.Module): ...@@ -39,12 +48,19 @@ class SingleLevelRoI(nn.Module):
return roi_layers return roi_layers
def map_roi_levels(self, rois, num_levels): def map_roi_levels(self, rois, num_levels):
"""Map rois to corresponding feature levels (0-based) by scales. """Map rois to corresponding feature levels by scales.
- scale < finest_scale: level 0 - scale < finest_scale: level 0
- finest_scale <= scale < finest_scale * 2: level 1 - finest_scale <= scale < finest_scale * 2: level 1
- finest_scale * 2 <= scale < finest_scale * 4: level 2 - finest_scale * 2 <= scale < finest_scale * 4: level 2
- scale >= finest_scale * 4: level 3 - scale >= finest_scale * 4: level 3
Args:
rois (Tensor): Input RoIs, shape (k, 5).
num_levels (int): Total level number.
Returns:
Tensor: Level index (0-based) of each RoI, shape (k, )
""" """
scale = torch.sqrt( scale = torch.sqrt(
(rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1))
...@@ -52,43 +68,7 @@ class SingleLevelRoI(nn.Module): ...@@ -52,43 +68,7 @@ class SingleLevelRoI(nn.Module):
target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
return target_lvls return target_lvls
def sample_proposals(self, proposals, gt_bboxes, gt_bboxes_ignore,
gt_labels, cfg):
proposals = proposals[:, :4]
assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \
bbox_assign(proposals, gt_bboxes, gt_bboxes_ignore, gt_labels,
cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou,
cfg.crowd_thr)
if cfg.add_gt_as_proposals:
proposals = torch.cat([gt_bboxes, proposals], dim=0)
gt_assign_self = torch.arange(
1,
len(gt_labels) + 1,
dtype=torch.long,
device=proposals.device)
assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds])
assigned_labels = torch.cat([gt_labels, assigned_labels])
pos_inds, neg_inds = bbox_sampling(
assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction,
cfg.neg_pos_ub, cfg.pos_balance_sampling, max_overlaps,
cfg.neg_balance_thr)
pos_proposals = proposals[pos_inds]
neg_proposals = proposals[neg_inds]
pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
pos_gt_labels = assigned_labels[pos_inds]
return (pos_proposals, neg_proposals, pos_assigned_gt_inds,
pos_gt_bboxes, pos_gt_labels)
def forward(self, feats, rois): def forward(self, feats, rois):
"""Extract roi features with the roi layer. If multiple feature levels
are used, then rois are mapped to corresponding levels according to
their scales.
"""
if len(feats) == 1: if len(feats) == 1:
return self.roi_layers[0](feats[0], rois) return self.roi_layers[0](feats[0], rois)
......
...@@ -25,7 +25,7 @@ model = dict( ...@@ -25,7 +25,7 @@ model = dict(
target_stds=[1.0, 1.0, 1.0, 1.0], target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True), use_sigmoid_cls=True),
bbox_roi_extractor=dict( bbox_roi_extractor=dict(
type='SingleLevelRoI', type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256, out_channels=256,
featmap_strides=[4, 8, 16, 32]), featmap_strides=[4, 8, 16, 32]),
......
...@@ -25,7 +25,7 @@ model = dict( ...@@ -25,7 +25,7 @@ model = dict(
target_stds=[1.0, 1.0, 1.0, 1.0], target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True), use_sigmoid_cls=True),
bbox_roi_extractor=dict( bbox_roi_extractor=dict(
type='SingleLevelRoI', type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256, out_channels=256,
featmap_strides=[4, 8, 16, 32]), featmap_strides=[4, 8, 16, 32]),
...@@ -40,7 +40,7 @@ model = dict( ...@@ -40,7 +40,7 @@ model = dict(
target_stds=[0.1, 0.1, 0.2, 0.2], target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False), reg_class_agnostic=False),
mask_roi_extractor=dict( mask_roi_extractor=dict(
type='SingleLevelRoI', type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256, out_channels=256,
featmap_strides=[4, 8, 16, 32]), featmap_strides=[4, 8, 16, 32]),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment