"vscode:/vscode.git/clone" did not exist on "21478411ef564068d447163073a78c01055438fa"
Commit 57f6da5c authored by bailuo's avatar bailuo
Browse files

readme

parents
from .version import __version__, short_version
__all__ = ['__version__', 'short_version']
from .inference import (async_inference_detector, inference_detector,
init_detector, show_result, show_result_pyplot, show_result_ins)
from .train import get_root_logger, set_random_seed, train_detector
__all__ = [
'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector',
'async_inference_detector', 'inference_detector', 'show_result',
'show_result_pyplot', 'show_result_ins'
]
import warnings
import matplotlib.pyplot as plt
import mmcv
import numpy as np
import pycocotools.mask as maskUtils
import torch
from mmcv.parallel import collate, scatter
from mmcv.runner import load_checkpoint
from mmdet.core import get_classes
from mmdet.datasets.pipelines import Compose
from mmdet.models import build_detector
import cv2
from scipy import ndimage
def init_detector(config, checkpoint=None, device='cuda:0'):
"""Initialize a detector from config file.
Args:
config (str or :obj:`mmcv.Config`): Config file path or the config
object.
checkpoint (str, optional): Checkpoint path. If left as None, the model
will not load any weights.
Returns:
nn.Module: The constructed detector.
"""
if isinstance(config, str):
config = mmcv.Config.fromfile(config)
elif not isinstance(config, mmcv.Config):
raise TypeError('config must be a filename or Config object, '
'but got {}'.format(type(config)))
config.model.pretrained = None
model = build_detector(config.model, test_cfg=config.test_cfg)
if checkpoint is not None:
checkpoint = load_checkpoint(model, checkpoint)
if 'CLASSES' in checkpoint['meta']:
model.CLASSES = checkpoint['meta']['CLASSES']
else:
warnings.warn('Class names are not saved in the checkpoint\'s '
'meta data, use COCO classes by default.')
model.CLASSES = get_classes('coco')
model.cfg = config # save the config in the model for convenience
model.to(device)
model.eval()
return model
class LoadImage(object):
def __call__(self, results):
if isinstance(results['img'], str):
results['filename'] = results['img']
else:
results['filename'] = None
img = mmcv.imread(results['img'])
results['img'] = img
results['img_shape'] = img.shape
results['ori_shape'] = img.shape
return results
def inference_detector(model, img):
"""Inference image(s) with the detector.
Args:
model (nn.Module): The loaded detector.
imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
images.
Returns:
If imgs is a str, a generator will be returned, otherwise return the
detection results directly.
"""
cfg = model.cfg
device = next(model.parameters()).device # model device
# build the data pipeline
test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
test_pipeline = Compose(test_pipeline)
# prepare data
data = dict(img=img)
data = test_pipeline(data)
data = scatter(collate([data], samples_per_gpu=1), [device])[0]
# forward the model
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)
return result
async def async_inference_detector(model, img):
"""Async inference image(s) with the detector.
Args:
model (nn.Module): The loaded detector.
imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
images.
Returns:
Awaitable detection results.
"""
cfg = model.cfg
device = next(model.parameters()).device # model device
# build the data pipeline
test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
test_pipeline = Compose(test_pipeline)
# prepare data
data = dict(img=img)
data = test_pipeline(data)
data = scatter(collate([data], samples_per_gpu=1), [device])[0]
# We don't restore `torch.is_grad_enabled()` value during concurrent
# inference since execution can overlap
torch.set_grad_enabled(False)
result = await model.aforward_test(rescale=True, **data)
return result
# TODO: merge this method with the one in BaseDetector
def show_result(img,
result,
class_names,
score_thr=0.3,
wait_time=0,
show=True,
out_file=None):
"""Visualize the detection results on the image.
Args:
img (str or np.ndarray): Image filename or loaded image.
result (tuple[list] or list): The detection result, can be either
(bbox, segm) or just bbox.
class_names (list[str] or tuple[str]): A list of class names.
score_thr (float): The threshold to visualize the bboxes and masks.
wait_time (int): Value of waitKey param.
show (bool, optional): Whether to show the image with opencv or not.
out_file (str, optional): If specified, the visualization result will
be written to the out file instead of shown in a window.
Returns:
np.ndarray or None: If neither `show` nor `out_file` is specified, the
visualized image is returned, otherwise None is returned.
"""
assert isinstance(class_names, (tuple, list))
img = mmcv.imread(img)
img = img.copy()
if isinstance(result, tuple):
bbox_result, segm_result = result
else:
bbox_result, segm_result = result, None
bboxes = np.vstack(bbox_result)
labels = [
np.full(bbox.shape[0], i, dtype=np.int32)
for i, bbox in enumerate(bbox_result)
]
labels = np.concatenate(labels)
# draw segmentation masks
if segm_result is not None:
segms = mmcv.concat_list(segm_result)
inds = np.where(bboxes[:, -1] > score_thr)[0]
np.random.seed(42)
color_masks = [
np.random.randint(0, 256, (1, 3), dtype=np.uint8)
for _ in range(max(labels) + 1)
]
for i in inds:
i = int(i)
color_mask = color_masks[labels[i]]
mask = maskUtils.decode(segms[i]).astype(np.bool)
img[mask] = img[mask] * 0.5 + color_mask * 0.5
# draw bounding boxes
mmcv.imshow_det_bboxes(
img,
bboxes,
labels,
class_names=class_names,
score_thr=score_thr,
show=show,
wait_time=wait_time,
out_file=out_file)
if not (show or out_file):
return img
def show_result_pyplot(img,
result,
class_names,
score_thr=0.3,
fig_size=(15, 10)):
"""Visualize the detection results on the image.
Args:
img (str or np.ndarray): Image filename or loaded image.
result (tuple[list] or list): The detection result, can be either
(bbox, segm) or just bbox.
class_names (list[str] or tuple[str]): A list of class names.
score_thr (float): The threshold to visualize the bboxes and masks.
fig_size (tuple): Figure size of the pyplot figure.
out_file (str, optional): If specified, the visualization result will
be written to the out file instead of shown in a window.
"""
img = show_result(
img, result, class_names, score_thr=score_thr, show=False)
plt.figure(figsize=fig_size)
plt.imshow(mmcv.bgr2rgb(img))
def show_result_ins(img,
result,
class_names,
score_thr=0.3,
sort_by_density=False,
out_file=None):
"""Visualize the instance segmentation results on the image.
Args:
img (str or np.ndarray): Image filename or loaded image.
result (tuple[list] or list): The instance segmentation result.
class_names (list[str] or tuple[str]): A list of class names.
score_thr (float): The threshold to visualize the masks.
sort_by_density (bool): sort the masks by their density.
out_file (str, optional): If specified, the visualization result will
be written to the out file instead of shown in a window.
Returns:
np.ndarray or None: If neither `show` nor `out_file` is specified, the
visualized image is returned, otherwise None is returned.
"""
assert isinstance(class_names, (tuple, list))
img = mmcv.imread(img)
img_show = img.copy()
h, w, _ = img.shape
if not result or result == [None]:
return img_show
cur_result = result[0]
seg_label = cur_result[0]
seg_label = seg_label.cpu().numpy().astype(np.uint8)
cate_label = cur_result[1]
cate_label = cate_label.cpu().numpy()
score = cur_result[2].cpu().numpy()
vis_inds = score > score_thr
seg_label = seg_label[vis_inds]
num_mask = seg_label.shape[0]
cate_label = cate_label[vis_inds]
cate_score = score[vis_inds]
if sort_by_density:
mask_density = []
for idx in range(num_mask):
cur_mask = seg_label[idx, :, :]
cur_mask = mmcv.imresize(cur_mask, (w, h))
cur_mask = (cur_mask > 0.5).astype(np.int32)
mask_density.append(cur_mask.sum())
orders = np.argsort(mask_density)
seg_label = seg_label[orders]
cate_label = cate_label[orders]
cate_score = cate_score[orders]
np.random.seed(42)
color_masks = [
np.random.randint(0, 256, (1, 3), dtype=np.uint8)
for _ in range(num_mask)
]
for idx in range(num_mask):
idx = -(idx+1)
cur_mask = seg_label[idx, :, :]
cur_mask = mmcv.imresize(cur_mask, (w, h))
cur_mask = (cur_mask > 0.5).astype(np.uint8)
if cur_mask.sum() == 0:
continue
color_mask = color_masks[idx]
cur_mask_bool = cur_mask.astype(np.bool)
img_show[cur_mask_bool] = img[cur_mask_bool] * 0.5 + color_mask * 0.5
cur_cate = cate_label[idx]
cur_score = cate_score[idx]
label_text = class_names[cur_cate]
#label_text += '|{:.02f}'.format(cur_score)
center_y, center_x = ndimage.measurements.center_of_mass(cur_mask)
vis_pos = (max(int(center_x) - 10, 0), int(center_y))
cv2.putText(img_show, label_text, vis_pos,
cv2.FONT_HERSHEY_COMPLEX, 0.3, (255, 255, 255)) # green
if out_file is None:
return img_show
else:
mmcv.imwrite(img_show, out_file)
import random
import re
from collections import OrderedDict
import numpy as np
import torch
import torch.distributed as dist
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import DistSamplerSeedHook, Runner, obj_from_dict
from mmdet import datasets
from mmdet.core import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
DistEvalmAPHook, DistOptimizerHook, Fp16OptimizerHook)
from mmdet.datasets import DATASETS, build_dataloader
from mmdet.models import RPN
from mmdet.utils import get_root_logger
def set_random_seed(seed, deterministic=False):
"""Set random seed.
Args:
seed (int): Seed to be used.
deterministic (bool): Whether to set the deterministic option for
CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
to True and `torch.backends.cudnn.benchmark` to False.
Default: False.
"""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
if deterministic:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
def parse_losses(losses):
log_vars = OrderedDict()
for loss_name, loss_value in losses.items():
if isinstance(loss_value, torch.Tensor):
log_vars[loss_name] = loss_value.mean()
elif isinstance(loss_value, list):
log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
else:
raise TypeError(
'{} is not a tensor or list of tensors'.format(loss_name))
loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
log_vars['loss'] = loss
for loss_name, loss_value in log_vars.items():
# reduce loss when distributed training
if dist.is_available() and dist.is_initialized():
loss_value = loss_value.data.clone()
dist.all_reduce(loss_value.div_(dist.get_world_size()))
log_vars[loss_name] = loss_value.item()
return loss, log_vars
def batch_processor(model, data, train_mode):
"""Process a data batch.
This method is required as an argument of Runner, which defines how to
process a data batch and obtain proper outputs. The first 3 arguments of
batch_processor are fixed.
Args:
model (nn.Module): A PyTorch model.
data (dict): The data batch in a dict.
train_mode (bool): Training mode or not. It may be useless for some
models.
Returns:
dict: A dict containing losses and log vars.
"""
losses = model(**data)
loss, log_vars = parse_losses(losses)
outputs = dict(
loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))
return outputs
def train_detector(model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None):
logger = get_root_logger(cfg.log_level)
# start training
if distributed:
_dist_train(
model,
dataset,
cfg,
validate=validate,
logger=logger,
timestamp=timestamp)
else:
_non_dist_train(
model,
dataset,
cfg,
validate=validate,
logger=logger,
timestamp=timestamp)
def build_optimizer(model, optimizer_cfg):
"""Build optimizer from configs.
Args:
model (:obj:`nn.Module`): The model with parameters to be optimized.
optimizer_cfg (dict): The config dict of the optimizer.
Positional fields are:
- type: class name of the optimizer.
- lr: base learning rate.
Optional fields are:
- any arguments of the corresponding optimizer type, e.g.,
weight_decay, momentum, etc.
- paramwise_options: a dict with 3 accepted fileds
(bias_lr_mult, bias_decay_mult, norm_decay_mult).
`bias_lr_mult` and `bias_decay_mult` will be multiplied to
the lr and weight decay respectively for all bias parameters
(except for the normalization layers), and
`norm_decay_mult` will be multiplied to the weight decay
for all weight and bias parameters of normalization layers.
Returns:
torch.optim.Optimizer: The initialized optimizer.
Example:
>>> model = torch.nn.modules.Conv1d(1, 1, 1)
>>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
>>> weight_decay=0.0001)
>>> optimizer = build_optimizer(model, optimizer_cfg)
"""
if hasattr(model, 'module'):
model = model.module
optimizer_cfg = optimizer_cfg.copy()
paramwise_options = optimizer_cfg.pop('paramwise_options', None)
# if no paramwise option is specified, just use the global setting
if paramwise_options is None:
return obj_from_dict(optimizer_cfg, torch.optim,
dict(params=model.parameters()))
else:
assert isinstance(paramwise_options, dict)
# get base lr and weight decay
base_lr = optimizer_cfg['lr']
base_wd = optimizer_cfg.get('weight_decay', None)
# weight_decay must be explicitly specified if mult is specified
if ('bias_decay_mult' in paramwise_options
or 'norm_decay_mult' in paramwise_options):
assert base_wd is not None
# get param-wise options
bias_lr_mult = paramwise_options.get('bias_lr_mult', 1.)
bias_decay_mult = paramwise_options.get('bias_decay_mult', 1.)
norm_decay_mult = paramwise_options.get('norm_decay_mult', 1.)
# set param-wise lr and weight decay
params = []
for name, param in model.named_parameters():
param_group = {'params': [param]}
if not param.requires_grad:
# FP16 training needs to copy gradient/weight between master
# weight copy and model weight, it is convenient to keep all
# parameters here to align with model.parameters()
params.append(param_group)
continue
# for norm layers, overwrite the weight decay of weight and bias
# TODO: obtain the norm layer prefixes dynamically
if re.search(r'(bn|gn)(\d+)?.(weight|bias)', name):
if base_wd is not None:
param_group['weight_decay'] = base_wd * norm_decay_mult
# for other layers, overwrite both lr and weight decay of bias
elif name.endswith('.bias'):
param_group['lr'] = base_lr * bias_lr_mult
if base_wd is not None:
param_group['weight_decay'] = base_wd * bias_decay_mult
# otherwise use the global settings
params.append(param_group)
optimizer_cls = getattr(torch.optim, optimizer_cfg.pop('type'))
return optimizer_cls(params, **optimizer_cfg)
def _dist_train(model,
dataset,
cfg,
validate=False,
logger=None,
timestamp=None):
# prepare data loaders
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
data_loaders = [
build_dataloader(
ds, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, dist=True)
for ds in dataset
]
# put model on gpus
model = MMDistributedDataParallel(model.cuda())
# build runner
optimizer = build_optimizer(model, cfg.optimizer)
runner = Runner(
model, batch_processor, optimizer, cfg.work_dir, logger=logger)
# an ugly walkaround to make the .log and .log.json filenames the same
runner.timestamp = timestamp
# fp16 setting
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config,
**fp16_cfg)
else:
optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
# register hooks
runner.register_training_hooks(cfg.lr_config, optimizer_config,
cfg.checkpoint_config, cfg.log_config)
runner.register_hook(DistSamplerSeedHook())
# register eval hooks
if validate:
val_dataset_cfg = cfg.data.val
eval_cfg = cfg.get('evaluation', {})
if isinstance(model.module, RPN):
# TODO: implement recall hooks for other datasets
runner.register_hook(
CocoDistEvalRecallHook(val_dataset_cfg, **eval_cfg))
else:
dataset_type = DATASETS.get(val_dataset_cfg.type)
if issubclass(dataset_type, datasets.CocoDataset):
runner.register_hook(
CocoDistEvalmAPHook(val_dataset_cfg, **eval_cfg))
else:
runner.register_hook(
DistEvalmAPHook(val_dataset_cfg, **eval_cfg))
if cfg.resume_from:
runner.resume(cfg.resume_from)
elif cfg.load_from:
runner.load_checkpoint(cfg.load_from)
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _non_dist_train(model,
dataset,
cfg,
validate=False,
logger=None,
timestamp=None):
if validate:
raise NotImplementedError('Built-in validation is not implemented '
'yet in not-distributed training. Use '
'distributed training or test.py and '
'*eval.py scripts instead.')
# prepare data loaders
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
data_loaders = [
build_dataloader(
ds,
cfg.data.imgs_per_gpu,
cfg.data.workers_per_gpu,
cfg.gpus,
dist=False) for ds in dataset
]
# put model on gpus
model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
# build runner
optimizer = build_optimizer(model, cfg.optimizer)
runner = Runner(
model, batch_processor, optimizer, cfg.work_dir, logger=logger)
# an ugly walkaround to make the .log and .log.json filenames the same
runner.timestamp = timestamp
# fp16 setting
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
optimizer_config = Fp16OptimizerHook(
**cfg.optimizer_config, **fp16_cfg, distributed=False)
else:
optimizer_config = cfg.optimizer_config
runner.register_training_hooks(cfg.lr_config, optimizer_config,
cfg.checkpoint_config, cfg.log_config)
if cfg.resume_from:
runner.resume(cfg.resume_from)
elif cfg.load_from:
runner.load_checkpoint(cfg.load_from)
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
from .anchor import * # noqa: F401, F403
from .bbox import * # noqa: F401, F403
from .evaluation import * # noqa: F401, F403
from .fp16 import * # noqa: F401, F403
from .mask import * # noqa: F401, F403
from .post_processing import * # noqa: F401, F403
from .utils import * # noqa: F401, F403
from .anchor_generator import AnchorGenerator
from .anchor_target import (anchor_inside_flags, anchor_target,
images_to_levels, unmap)
from .guided_anchor_target import ga_loc_target, ga_shape_target
from .point_generator import PointGenerator
from .point_target import point_target
__all__ = [
'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target',
'ga_shape_target', 'PointGenerator', 'point_target', 'images_to_levels',
'unmap'
]
import torch
class AnchorGenerator(object):
"""
Examples:
>>> from mmdet.core import AnchorGenerator
>>> self = AnchorGenerator(9, [1.], [1.])
>>> all_anchors = self.grid_anchors((2, 2), device='cpu')
>>> print(all_anchors)
tensor([[ 0., 0., 8., 8.],
[16., 0., 24., 8.],
[ 0., 16., 8., 24.],
[16., 16., 24., 24.]])
"""
def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
self.base_size = base_size
self.scales = torch.Tensor(scales)
self.ratios = torch.Tensor(ratios)
self.scale_major = scale_major
self.ctr = ctr
self.base_anchors = self.gen_base_anchors()
@property
def num_base_anchors(self):
return self.base_anchors.size(0)
def gen_base_anchors(self):
w = self.base_size
h = self.base_size
if self.ctr is None:
x_ctr = 0.5 * (w - 1)
y_ctr = 0.5 * (h - 1)
else:
x_ctr, y_ctr = self.ctr
h_ratios = torch.sqrt(self.ratios)
w_ratios = 1 / h_ratios
if self.scale_major:
ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
else:
ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
# yapf: disable
base_anchors = torch.stack(
[
x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
],
dim=-1).round()
# yapf: enable
return base_anchors
def _meshgrid(self, x, y, row_major=True):
xx = x.repeat(len(y))
yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
if row_major:
return xx, yy
else:
return yy, xx
def grid_anchors(self, featmap_size, stride=16, device='cuda'):
base_anchors = self.base_anchors.to(device)
feat_h, feat_w = featmap_size
shift_x = torch.arange(0, feat_w, device=device) * stride
shift_y = torch.arange(0, feat_h, device=device) * stride
shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
shifts = shifts.type_as(base_anchors)
# first feat_w elements correspond to the first row of shifts
# add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
# shifted anchors (K, A, 4), reshape to (K*A, 4)
all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
all_anchors = all_anchors.view(-1, 4)
# first A rows correspond to A anchors of (0, 0) in feature map,
# then (0, 1), (0, 2), ...
return all_anchors
def valid_flags(self, featmap_size, valid_size, device='cuda'):
feat_h, feat_w = featmap_size
valid_h, valid_w = valid_size
assert valid_h <= feat_h and valid_w <= feat_w
valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
valid_x[:valid_w] = 1
valid_y[:valid_h] = 1
valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
valid = valid_xx & valid_yy
valid = valid[:,
None].expand(valid.size(0),
self.num_base_anchors).contiguous().view(-1)
return valid
import torch
from ..bbox import PseudoSampler, assign_and_sample, bbox2delta, build_assigner
from ..utils import multi_apply
def anchor_target(anchor_list,
valid_flag_list,
gt_bboxes_list,
img_metas,
target_means,
target_stds,
cfg,
gt_bboxes_ignore_list=None,
gt_labels_list=None,
label_channels=1,
sampling=True,
unmap_outputs=True):
"""Compute regression and classification targets for anchors.
Args:
anchor_list (list[list]): Multi level anchors of each image.
valid_flag_list (list[list]): Multi level valid flags of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
target_means (Iterable): Mean value of regression targets.
target_stds (Iterable): Std value of regression targets.
cfg (dict): RPN train configs.
Returns:
tuple
"""
num_imgs = len(img_metas)
assert len(anchor_list) == len(valid_flag_list) == num_imgs
# anchor number of multi levels
num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
# concat all level anchors and flags to a single tensor
for i in range(num_imgs):
assert len(anchor_list[i]) == len(valid_flag_list[i])
anchor_list[i] = torch.cat(anchor_list[i])
valid_flag_list[i] = torch.cat(valid_flag_list[i])
# compute targets for each image
if gt_bboxes_ignore_list is None:
gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
if gt_labels_list is None:
gt_labels_list = [None for _ in range(num_imgs)]
(all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
pos_inds_list, neg_inds_list) = multi_apply(
anchor_target_single,
anchor_list,
valid_flag_list,
gt_bboxes_list,
gt_bboxes_ignore_list,
gt_labels_list,
img_metas,
target_means=target_means,
target_stds=target_stds,
cfg=cfg,
label_channels=label_channels,
sampling=sampling,
unmap_outputs=unmap_outputs)
# no valid anchors
if any([labels is None for labels in all_labels]):
return None
# sampled anchors of all images
num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
# split targets to a list w.r.t. multiple levels
labels_list = images_to_levels(all_labels, num_level_anchors)
label_weights_list = images_to_levels(all_label_weights, num_level_anchors)
bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors)
bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors)
return (labels_list, label_weights_list, bbox_targets_list,
bbox_weights_list, num_total_pos, num_total_neg)
def images_to_levels(target, num_level_anchors):
"""Convert targets by image to targets by feature level.
[target_img0, target_img1] -> [target_level0, target_level1, ...]
"""
target = torch.stack(target, 0)
level_targets = []
start = 0
for n in num_level_anchors:
end = start + n
level_targets.append(target[:, start:end].squeeze(0))
start = end
return level_targets
def anchor_target_single(flat_anchors,
valid_flags,
gt_bboxes,
gt_bboxes_ignore,
gt_labels,
img_meta,
target_means,
target_stds,
cfg,
label_channels=1,
sampling=True,
unmap_outputs=True):
inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
img_meta['img_shape'][:2],
cfg.allowed_border)
if not inside_flags.any():
return (None, ) * 6
# assign gt and sample anchors
anchors = flat_anchors[inside_flags, :]
if sampling:
assign_result, sampling_result = assign_and_sample(
anchors, gt_bboxes, gt_bboxes_ignore, None, cfg)
else:
bbox_assigner = build_assigner(cfg.assigner)
assign_result = bbox_assigner.assign(anchors, gt_bboxes,
gt_bboxes_ignore, gt_labels)
bbox_sampler = PseudoSampler()
sampling_result = bbox_sampler.sample(assign_result, anchors,
gt_bboxes)
num_valid_anchors = anchors.shape[0]
bbox_targets = torch.zeros_like(anchors)
bbox_weights = torch.zeros_like(anchors)
labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
pos_inds = sampling_result.pos_inds
neg_inds = sampling_result.neg_inds
if len(pos_inds) > 0:
pos_bbox_targets = bbox2delta(sampling_result.pos_bboxes,
sampling_result.pos_gt_bboxes,
target_means, target_stds)
bbox_targets[pos_inds, :] = pos_bbox_targets
bbox_weights[pos_inds, :] = 1.0
if gt_labels is None:
labels[pos_inds] = 1
else:
labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
if cfg.pos_weight <= 0:
label_weights[pos_inds] = 1.0
else:
label_weights[pos_inds] = cfg.pos_weight
if len(neg_inds) > 0:
label_weights[neg_inds] = 1.0
# map up to original set of anchors
if unmap_outputs:
num_total_anchors = flat_anchors.size(0)
labels = unmap(labels, num_total_anchors, inside_flags)
label_weights = unmap(label_weights, num_total_anchors, inside_flags)
bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
neg_inds)
def anchor_inside_flags(flat_anchors,
valid_flags,
img_shape,
allowed_border=0):
img_h, img_w = img_shape[:2]
if allowed_border >= 0:
inside_flags = valid_flags & \
(flat_anchors[:, 0] >= -allowed_border).type(torch.uint8) & \
(flat_anchors[:, 1] >= -allowed_border).type(torch.uint8) & \
(flat_anchors[:, 2] < img_w + allowed_border).type(torch.uint8) & \
(flat_anchors[:, 3] < img_h + allowed_border).type(torch.uint8)
else:
inside_flags = valid_flags
return inside_flags
def unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if data.dim() == 1:
ret = data.new_full((count, ), fill)
ret[inds] = data
else:
new_size = (count, ) + data.size()[1:]
ret = data.new_full(new_size, fill)
ret[inds, :] = data
return ret
import torch
from ..bbox import PseudoSampler, build_assigner, build_sampler
from ..utils import multi_apply, unmap
def calc_region(bbox, ratio, featmap_size=None):
"""Calculate a proportional bbox region.
The bbox center are fixed and the new h' and w' is h * ratio and w * ratio.
Args:
bbox (Tensor): Bboxes to calculate regions, shape (n, 4)
ratio (float): Ratio of the output region.
featmap_size (tuple): Feature map size used for clipping the boundary.
Returns:
tuple: x1, y1, x2, y2
"""
x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long()
y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long()
x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()
y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()
if featmap_size is not None:
x1 = x1.clamp(min=0, max=featmap_size[1] - 1)
y1 = y1.clamp(min=0, max=featmap_size[0] - 1)
x2 = x2.clamp(min=0, max=featmap_size[1] - 1)
y2 = y2.clamp(min=0, max=featmap_size[0] - 1)
return (x1, y1, x2, y2)
def ga_loc_target(gt_bboxes_list,
featmap_sizes,
anchor_scale,
anchor_strides,
center_ratio=0.2,
ignore_ratio=0.5):
"""Compute location targets for guided anchoring.
Each feature map is divided into positive, negative and ignore regions.
- positive regions: target 1, weight 1
- ignore regions: target 0, weight 0
- negative regions: target 0, weight 0.1
Args:
gt_bboxes_list (list[Tensor]): Gt bboxes of each image.
featmap_sizes (list[tuple]): Multi level sizes of each feature maps.
anchor_scale (int): Anchor scale.
anchor_strides ([list[int]]): Multi level anchor strides.
center_ratio (float): Ratio of center region.
ignore_ratio (float): Ratio of ignore region.
Returns:
tuple
"""
img_per_gpu = len(gt_bboxes_list)
num_lvls = len(featmap_sizes)
r1 = (1 - center_ratio) / 2
r2 = (1 - ignore_ratio) / 2
all_loc_targets = []
all_loc_weights = []
all_ignore_map = []
for lvl_id in range(num_lvls):
h, w = featmap_sizes[lvl_id]
loc_targets = torch.zeros(
img_per_gpu,
1,
h,
w,
device=gt_bboxes_list[0].device,
dtype=torch.float32)
loc_weights = torch.full_like(loc_targets, -1)
ignore_map = torch.zeros_like(loc_targets)
all_loc_targets.append(loc_targets)
all_loc_weights.append(loc_weights)
all_ignore_map.append(ignore_map)
for img_id in range(img_per_gpu):
gt_bboxes = gt_bboxes_list[img_id]
scale = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) *
(gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1))
min_anchor_size = scale.new_full(
(1, ), float(anchor_scale * anchor_strides[0]))
# assign gt bboxes to different feature levels w.r.t. their scales
target_lvls = torch.floor(
torch.log2(scale) - torch.log2(min_anchor_size) + 0.5)
target_lvls = target_lvls.clamp(min=0, max=num_lvls - 1).long()
for gt_id in range(gt_bboxes.size(0)):
lvl = target_lvls[gt_id].item()
# rescaled to corresponding feature map
gt_ = gt_bboxes[gt_id, :4] / anchor_strides[lvl]
# calculate ignore regions
ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(
gt_, r2, featmap_sizes[lvl])
# calculate positive (center) regions
ctr_x1, ctr_y1, ctr_x2, ctr_y2 = calc_region(
gt_, r1, featmap_sizes[lvl])
all_loc_targets[lvl][img_id, 0, ctr_y1:ctr_y2 + 1,
ctr_x1:ctr_x2 + 1] = 1
all_loc_weights[lvl][img_id, 0, ignore_y1:ignore_y2 + 1,
ignore_x1:ignore_x2 + 1] = 0
all_loc_weights[lvl][img_id, 0, ctr_y1:ctr_y2 + 1,
ctr_x1:ctr_x2 + 1] = 1
# calculate ignore map on nearby low level feature
if lvl > 0:
d_lvl = lvl - 1
# rescaled to corresponding feature map
gt_ = gt_bboxes[gt_id, :4] / anchor_strides[d_lvl]
ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(
gt_, r2, featmap_sizes[d_lvl])
all_ignore_map[d_lvl][img_id, 0, ignore_y1:ignore_y2 + 1,
ignore_x1:ignore_x2 + 1] = 1
# calculate ignore map on nearby high level feature
if lvl < num_lvls - 1:
u_lvl = lvl + 1
# rescaled to corresponding feature map
gt_ = gt_bboxes[gt_id, :4] / anchor_strides[u_lvl]
ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(
gt_, r2, featmap_sizes[u_lvl])
all_ignore_map[u_lvl][img_id, 0, ignore_y1:ignore_y2 + 1,
ignore_x1:ignore_x2 + 1] = 1
for lvl_id in range(num_lvls):
# ignore negative regions w.r.t. ignore map
all_loc_weights[lvl_id][(all_loc_weights[lvl_id] < 0)
& (all_ignore_map[lvl_id] > 0)] = 0
# set negative regions with weight 0.1
all_loc_weights[lvl_id][all_loc_weights[lvl_id] < 0] = 0.1
# loc average factor to balance loss
loc_avg_factor = sum(
[t.size(0) * t.size(-1) * t.size(-2) for t in all_loc_targets]) / 200
return all_loc_targets, all_loc_weights, loc_avg_factor
def ga_shape_target(approx_list,
inside_flag_list,
square_list,
gt_bboxes_list,
img_metas,
approxs_per_octave,
cfg,
gt_bboxes_ignore_list=None,
sampling=True,
unmap_outputs=True):
"""Compute guided anchoring targets.
Args:
approx_list (list[list]): Multi level approxs of each image.
inside_flag_list (list[list]): Multi level inside flags of each image.
square_list (list[list]): Multi level squares of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
approxs_per_octave (int): number of approxs per octave
cfg (dict): RPN train configs.
gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes.
sampling (bool): sampling or not.
unmap_outputs (bool): unmap outputs or not.
Returns:
tuple
"""
num_imgs = len(img_metas)
assert len(approx_list) == len(inside_flag_list) == len(
square_list) == num_imgs
# anchor number of multi levels
num_level_squares = [squares.size(0) for squares in square_list[0]]
# concat all level anchors and flags to a single tensor
inside_flag_flat_list = []
approx_flat_list = []
square_flat_list = []
for i in range(num_imgs):
assert len(square_list[i]) == len(inside_flag_list[i])
inside_flag_flat_list.append(torch.cat(inside_flag_list[i]))
approx_flat_list.append(torch.cat(approx_list[i]))
square_flat_list.append(torch.cat(square_list[i]))
# compute targets for each image
if gt_bboxes_ignore_list is None:
gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
(all_bbox_anchors, all_bbox_gts, all_bbox_weights, pos_inds_list,
neg_inds_list) = multi_apply(
ga_shape_target_single,
approx_flat_list,
inside_flag_flat_list,
square_flat_list,
gt_bboxes_list,
gt_bboxes_ignore_list,
img_metas,
approxs_per_octave=approxs_per_octave,
cfg=cfg,
sampling=sampling,
unmap_outputs=unmap_outputs)
# no valid anchors
if any([bbox_anchors is None for bbox_anchors in all_bbox_anchors]):
return None
# sampled anchors of all images
num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
# split targets to a list w.r.t. multiple levels
bbox_anchors_list = images_to_levels(all_bbox_anchors, num_level_squares)
bbox_gts_list = images_to_levels(all_bbox_gts, num_level_squares)
bbox_weights_list = images_to_levels(all_bbox_weights, num_level_squares)
return (bbox_anchors_list, bbox_gts_list, bbox_weights_list, num_total_pos,
num_total_neg)
def images_to_levels(target, num_level_anchors):
"""Convert targets by image to targets by feature level.
[target_img0, target_img1] -> [target_level0, target_level1, ...]
"""
target = torch.stack(target, 0)
level_targets = []
start = 0
for n in num_level_anchors:
end = start + n
level_targets.append(target[:, start:end].squeeze(0))
start = end
return level_targets
def ga_shape_target_single(flat_approxs,
inside_flags,
flat_squares,
gt_bboxes,
gt_bboxes_ignore,
img_meta,
approxs_per_octave,
cfg,
sampling=True,
unmap_outputs=True):
"""Compute guided anchoring targets.
This function returns sampled anchors and gt bboxes directly
rather than calculates regression targets.
Args:
flat_approxs (Tensor): flat approxs of a single image,
shape (n, 4)
inside_flags (Tensor): inside flags of a single image,
shape (n, ).
flat_squares (Tensor): flat squares of a single image,
shape (approxs_per_octave * n, 4)
gt_bboxes (Tensor): Ground truth bboxes of a single image.
img_meta (dict): Meta info of a single image.
approxs_per_octave (int): number of approxs per octave
cfg (dict): RPN train configs.
sampling (bool): sampling or not.
unmap_outputs (bool): unmap outputs or not.
Returns:
tuple
"""
if not inside_flags.any():
return (None, ) * 5
# assign gt and sample anchors
expand_inside_flags = inside_flags[:, None].expand(
-1, approxs_per_octave).reshape(-1)
approxs = flat_approxs[expand_inside_flags, :]
squares = flat_squares[inside_flags, :]
bbox_assigner = build_assigner(cfg.ga_assigner)
assign_result = bbox_assigner.assign(approxs, squares, approxs_per_octave,
gt_bboxes, gt_bboxes_ignore)
if sampling:
bbox_sampler = build_sampler(cfg.ga_sampler)
else:
bbox_sampler = PseudoSampler()
sampling_result = bbox_sampler.sample(assign_result, squares, gt_bboxes)
bbox_anchors = torch.zeros_like(squares)
bbox_gts = torch.zeros_like(squares)
bbox_weights = torch.zeros_like(squares)
pos_inds = sampling_result.pos_inds
neg_inds = sampling_result.neg_inds
if len(pos_inds) > 0:
bbox_anchors[pos_inds, :] = sampling_result.pos_bboxes
bbox_gts[pos_inds, :] = sampling_result.pos_gt_bboxes
bbox_weights[pos_inds, :] = 1.0
# map up to original set of anchors
if unmap_outputs:
num_total_anchors = flat_squares.size(0)
bbox_anchors = unmap(bbox_anchors, num_total_anchors, inside_flags)
bbox_gts = unmap(bbox_gts, num_total_anchors, inside_flags)
bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
return (bbox_anchors, bbox_gts, bbox_weights, pos_inds, neg_inds)
import torch
class PointGenerator(object):
def _meshgrid(self, x, y, row_major=True):
xx = x.repeat(len(y))
yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
if row_major:
return xx, yy
else:
return yy, xx
def grid_points(self, featmap_size, stride=16, device='cuda'):
feat_h, feat_w = featmap_size
shift_x = torch.arange(0., feat_w, device=device) * stride
shift_y = torch.arange(0., feat_h, device=device) * stride
shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
stride = shift_x.new_full((shift_xx.shape[0], ), stride)
shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)
all_points = shifts.to(device)
return all_points
def valid_flags(self, featmap_size, valid_size, device='cuda'):
feat_h, feat_w = featmap_size
valid_h, valid_w = valid_size
assert valid_h <= feat_h and valid_w <= feat_w
valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
valid_x[:valid_w] = 1
valid_y[:valid_h] = 1
valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
valid = valid_xx & valid_yy
return valid
import torch
from ..bbox import PseudoSampler, assign_and_sample, build_assigner
from ..utils import multi_apply
def point_target(proposals_list,
valid_flag_list,
gt_bboxes_list,
img_metas,
cfg,
gt_bboxes_ignore_list=None,
gt_labels_list=None,
label_channels=1,
sampling=True,
unmap_outputs=True):
"""Compute corresponding GT box and classification targets for proposals.
Args:
points_list (list[list]): Multi level points of each image.
valid_flag_list (list[list]): Multi level valid flags of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
cfg (dict): train sample configs.
Returns:
tuple
"""
num_imgs = len(img_metas)
assert len(proposals_list) == len(valid_flag_list) == num_imgs
# points number of multi levels
num_level_proposals = [points.size(0) for points in proposals_list[0]]
# concat all level points and flags to a single tensor
for i in range(num_imgs):
assert len(proposals_list[i]) == len(valid_flag_list[i])
proposals_list[i] = torch.cat(proposals_list[i])
valid_flag_list[i] = torch.cat(valid_flag_list[i])
# compute targets for each image
if gt_bboxes_ignore_list is None:
gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
if gt_labels_list is None:
gt_labels_list = [None for _ in range(num_imgs)]
(all_labels, all_label_weights, all_bbox_gt, all_proposals,
all_proposal_weights, pos_inds_list, neg_inds_list) = multi_apply(
point_target_single,
proposals_list,
valid_flag_list,
gt_bboxes_list,
gt_bboxes_ignore_list,
gt_labels_list,
cfg=cfg,
label_channels=label_channels,
sampling=sampling,
unmap_outputs=unmap_outputs)
# no valid points
if any([labels is None for labels in all_labels]):
return None
# sampled points of all images
num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
labels_list = images_to_levels(all_labels, num_level_proposals)
label_weights_list = images_to_levels(all_label_weights,
num_level_proposals)
bbox_gt_list = images_to_levels(all_bbox_gt, num_level_proposals)
proposals_list = images_to_levels(all_proposals, num_level_proposals)
proposal_weights_list = images_to_levels(all_proposal_weights,
num_level_proposals)
return (labels_list, label_weights_list, bbox_gt_list, proposals_list,
proposal_weights_list, num_total_pos, num_total_neg)
def images_to_levels(target, num_level_grids):
"""Convert targets by image to targets by feature level.
[target_img0, target_img1] -> [target_level0, target_level1, ...]
"""
target = torch.stack(target, 0)
level_targets = []
start = 0
for n in num_level_grids:
end = start + n
level_targets.append(target[:, start:end].squeeze(0))
start = end
return level_targets
def point_target_single(flat_proposals,
valid_flags,
gt_bboxes,
gt_bboxes_ignore,
gt_labels,
cfg,
label_channels=1,
sampling=True,
unmap_outputs=True):
inside_flags = valid_flags
if not inside_flags.any():
return (None, ) * 7
# assign gt and sample proposals
proposals = flat_proposals[inside_flags, :]
if sampling:
assign_result, sampling_result = assign_and_sample(
proposals, gt_bboxes, gt_bboxes_ignore, None, cfg)
else:
bbox_assigner = build_assigner(cfg.assigner)
assign_result = bbox_assigner.assign(proposals, gt_bboxes,
gt_bboxes_ignore, gt_labels)
bbox_sampler = PseudoSampler()
sampling_result = bbox_sampler.sample(assign_result, proposals,
gt_bboxes)
num_valid_proposals = proposals.shape[0]
bbox_gt = proposals.new_zeros([num_valid_proposals, 4])
pos_proposals = torch.zeros_like(proposals)
proposals_weights = proposals.new_zeros([num_valid_proposals, 4])
labels = proposals.new_zeros(num_valid_proposals, dtype=torch.long)
label_weights = proposals.new_zeros(num_valid_proposals, dtype=torch.float)
pos_inds = sampling_result.pos_inds
neg_inds = sampling_result.neg_inds
if len(pos_inds) > 0:
pos_gt_bboxes = sampling_result.pos_gt_bboxes
bbox_gt[pos_inds, :] = pos_gt_bboxes
pos_proposals[pos_inds, :] = proposals[pos_inds, :]
proposals_weights[pos_inds, :] = 1.0
if gt_labels is None:
labels[pos_inds] = 1
else:
labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
if cfg.pos_weight <= 0:
label_weights[pos_inds] = 1.0
else:
label_weights[pos_inds] = cfg.pos_weight
if len(neg_inds) > 0:
label_weights[neg_inds] = 1.0
# map up to original set of proposals
if unmap_outputs:
num_total_proposals = flat_proposals.size(0)
labels = unmap(labels, num_total_proposals, inside_flags)
label_weights = unmap(label_weights, num_total_proposals, inside_flags)
bbox_gt = unmap(bbox_gt, num_total_proposals, inside_flags)
pos_proposals = unmap(pos_proposals, num_total_proposals, inside_flags)
proposals_weights = unmap(proposals_weights, num_total_proposals,
inside_flags)
return (labels, label_weights, bbox_gt, pos_proposals, proposals_weights,
pos_inds, neg_inds)
def unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if data.dim() == 1:
ret = data.new_full((count, ), fill)
ret[inds] = data
else:
new_size = (count, ) + data.size()[1:]
ret = data.new_full(new_size, fill)
ret[inds, :] = data
return ret
from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
from .bbox_target import bbox_target
from .geometry import bbox_overlaps
from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult)
from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip,
bbox_mapping, bbox_mapping_back, delta2bbox,
distance2bbox, roi2bbox)
from .assign_sampling import ( # isort:skip, avoid recursive imports
assign_and_sample, build_assigner, build_sampler)
__all__ = [
'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
'BaseSampler', 'PseudoSampler', 'RandomSampler',
'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
'distance2bbox', 'bbox_target'
]
import mmcv
from . import assigners, samplers
def build_assigner(cfg, **kwargs):
if isinstance(cfg, assigners.BaseAssigner):
return cfg
elif isinstance(cfg, dict):
return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs)
else:
raise TypeError('Invalid type {} for building a sampler'.format(
type(cfg)))
def build_sampler(cfg, **kwargs):
if isinstance(cfg, samplers.BaseSampler):
return cfg
elif isinstance(cfg, dict):
return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs)
else:
raise TypeError('Invalid type {} for building a sampler'.format(
type(cfg)))
def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
bbox_assigner = build_assigner(cfg.assigner)
bbox_sampler = build_sampler(cfg.sampler)
assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
gt_labels)
sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
gt_labels)
return assign_result, sampling_result
from .approx_max_iou_assigner import ApproxMaxIoUAssigner
from .assign_result import AssignResult
from .atss_assigner import ATSSAssigner
from .base_assigner import BaseAssigner
from .max_iou_assigner import MaxIoUAssigner
from .point_assigner import PointAssigner
__all__ = [
'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult',
'PointAssigner', 'ATSSAssigner'
]
import torch
from ..geometry import bbox_overlaps
from .max_iou_assigner import MaxIoUAssigner
class ApproxMaxIoUAssigner(MaxIoUAssigner):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, `0`, or a positive integer
indicating the ground truth index.
- -1: don't care
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. Positive samples can have smaller IoU than
pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
gt_max_assign_all (bool): Whether to assign all bboxes with the same
highest overlap with some gt to that gt.
ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
`gt_bboxes_ignore` is specified). Negative values mean not
ignoring any bboxes.
ignore_wrt_candidates (bool): Whether to compute the iof between
`bboxes` and `gt_bboxes_ignore`, or the contrary.
gpu_assign_thr (int): The upper bound of the number of GT for GPU
assign. When the number of gt is above this threshold, will assign
on CPU device. Negative values mean not assign on CPU.
"""
def __init__(self,
pos_iou_thr,
neg_iou_thr,
min_pos_iou=.0,
gt_max_assign_all=True,
ignore_iof_thr=-1,
ignore_wrt_candidates=True,
gpu_assign_thr=-1):
self.pos_iou_thr = pos_iou_thr
self.neg_iou_thr = neg_iou_thr
self.min_pos_iou = min_pos_iou
self.gt_max_assign_all = gt_max_assign_all
self.ignore_iof_thr = ignore_iof_thr
self.ignore_wrt_candidates = ignore_wrt_candidates
self.gpu_assign_thr = gpu_assign_thr
def assign(self,
approxs,
squares,
approxs_per_octave,
gt_bboxes,
gt_bboxes_ignore=None,
gt_labels=None):
"""Assign gt to approxs.
This method assign a gt bbox to each group of approxs (bboxes),
each group of approxs is represent by a base approx (bbox) and
will be assigned with -1, 0, or a positive number.
-1 means don't care, 0 means negative sample,
positive number is the index (1-based) of assigned gt.
The assignment is done in following steps, the order matters.
1. assign every bbox to -1
2. use the max IoU of each group of approxs to assign
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals (may be more than
one) to itself
Args:
approxs (Tensor): Bounding boxes to be assigned,
shape(approxs_per_octave*n, 4).
squares (Tensor): Base Bounding boxes to be assigned,
shape(n, 4).
approxs_per_octave (int): number of approxs per octave
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
num_squares = squares.size(0)
num_gts = gt_bboxes.size(0)
if num_squares == 0 or num_gts == 0:
# No predictions and/or truth, return empty assignment
overlaps = approxs.new(num_gts, num_squares)
assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
return assign_result
# re-organize anchors by approxs_per_octave x num_squares
approxs = torch.transpose(
approxs.view(num_squares, approxs_per_octave, 4), 0,
1).contiguous().view(-1, 4)
assign_on_cpu = True if (self.gpu_assign_thr > 0) and (
num_gts > self.gpu_assign_thr) else False
# compute overlap and assign gt on CPU when number of GT is large
if assign_on_cpu:
device = approxs.device
approxs = approxs.cpu()
gt_bboxes = gt_bboxes.cpu()
if gt_bboxes_ignore is not None:
gt_bboxes_ignore = gt_bboxes_ignore.cpu()
if gt_labels is not None:
gt_labels = gt_labels.cpu()
all_overlaps = bbox_overlaps(approxs, gt_bboxes)
overlaps, _ = all_overlaps.view(approxs_per_octave, num_squares,
num_gts).max(dim=0)
overlaps = torch.transpose(overlaps, 0, 1)
bboxes = squares[:, :4]
if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
gt_bboxes_ignore.numel() > 0):
if self.ignore_wrt_candidates:
ignore_overlaps = bbox_overlaps(
bboxes, gt_bboxes_ignore, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
else:
ignore_overlaps = bbox_overlaps(
gt_bboxes_ignore, bboxes, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
if assign_on_cpu:
assign_result.gt_inds = assign_result.gt_inds.to(device)
assign_result.max_overlaps = assign_result.max_overlaps.to(device)
if assign_result.labels is not None:
assign_result.labels = assign_result.labels.to(device)
return assign_result
import torch
from mmdet.utils import util_mixins
class AssignResult(util_mixins.NiceRepr):
"""
Stores assignments between predicted and truth boxes.
Attributes:
num_gts (int): the number of truth boxes considered when computing this
assignment
gt_inds (LongTensor): for each predicted box indicates the 1-based
index of the assigned truth box. 0 means unassigned and -1 means
ignore.
max_overlaps (FloatTensor): the iou between the predicted box and its
assigned truth box.
labels (None | LongTensor): If specified, for each predicted box
indicates the category label of the assigned truth box.
Example:
>>> # An assign result between 4 predicted boxes and 9 true boxes
>>> # where only two boxes were assigned.
>>> num_gts = 9
>>> max_overlaps = torch.LongTensor([0, .5, .9, 0])
>>> gt_inds = torch.LongTensor([-1, 1, 2, 0])
>>> labels = torch.LongTensor([0, 3, 4, 0])
>>> self = AssignResult(num_gts, gt_inds, max_overlaps, labels)
>>> print(str(self)) # xdoctest: +IGNORE_WANT
<AssignResult(num_gts=9, gt_inds.shape=(4,), max_overlaps.shape=(4,),
labels.shape=(4,))>
>>> # Force addition of gt labels (when adding gt as proposals)
>>> new_labels = torch.LongTensor([3, 4, 5])
>>> self.add_gt_(new_labels)
>>> print(str(self)) # xdoctest: +IGNORE_WANT
<AssignResult(num_gts=9, gt_inds.shape=(7,), max_overlaps.shape=(7,),
labels.shape=(7,))>
"""
def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
self.num_gts = num_gts
self.gt_inds = gt_inds
self.max_overlaps = max_overlaps
self.labels = labels
@property
def num_preds(self):
"""
Return the number of predictions in this assignment
"""
return len(self.gt_inds)
@property
def info(self):
"""
Returns a dictionary of info about the object
"""
return {
'num_gts': self.num_gts,
'num_preds': self.num_preds,
'gt_inds': self.gt_inds,
'max_overlaps': self.max_overlaps,
'labels': self.labels,
}
def __nice__(self):
"""
Create a "nice" summary string describing this assign result
"""
parts = []
parts.append('num_gts={!r}'.format(self.num_gts))
if self.gt_inds is None:
parts.append('gt_inds={!r}'.format(self.gt_inds))
else:
parts.append('gt_inds.shape={!r}'.format(
tuple(self.gt_inds.shape)))
if self.max_overlaps is None:
parts.append('max_overlaps={!r}'.format(self.max_overlaps))
else:
parts.append('max_overlaps.shape={!r}'.format(
tuple(self.max_overlaps.shape)))
if self.labels is None:
parts.append('labels={!r}'.format(self.labels))
else:
parts.append('labels.shape={!r}'.format(tuple(self.labels.shape)))
return ', '.join(parts)
@classmethod
def random(cls, **kwargs):
"""
Create random AssignResult for tests or debugging.
Kwargs:
num_preds: number of predicted boxes
num_gts: number of true boxes
p_ignore (float): probability of a predicted box assinged to an
ignored truth
p_assigned (float): probability of a predicted box not being
assigned
p_use_label (float | bool): with labels or not
rng (None | int | numpy.random.RandomState): seed or state
Returns:
AssignResult :
Example:
>>> from mmdet.core.bbox.assigners.assign_result import * # NOQA
>>> self = AssignResult.random()
>>> print(self.info)
"""
from mmdet.core.bbox import demodata
rng = demodata.ensure_rng(kwargs.get('rng', None))
num_gts = kwargs.get('num_gts', None)
num_preds = kwargs.get('num_preds', None)
p_ignore = kwargs.get('p_ignore', 0.3)
p_assigned = kwargs.get('p_assigned', 0.7)
p_use_label = kwargs.get('p_use_label', 0.5)
num_classes = kwargs.get('p_use_label', 3)
if num_gts is None:
num_gts = rng.randint(0, 8)
if num_preds is None:
num_preds = rng.randint(0, 16)
if num_gts == 0:
max_overlaps = torch.zeros(num_preds, dtype=torch.float32)
gt_inds = torch.zeros(num_preds, dtype=torch.int64)
if p_use_label is True or p_use_label < rng.rand():
labels = torch.zeros(num_preds, dtype=torch.int64)
else:
labels = None
else:
import numpy as np
# Create an overlap for each predicted box
max_overlaps = torch.from_numpy(rng.rand(num_preds))
# Construct gt_inds for each predicted box
is_assigned = torch.from_numpy(rng.rand(num_preds) < p_assigned)
# maximum number of assignments constraints
n_assigned = min(num_preds, min(num_gts, is_assigned.sum()))
assigned_idxs = np.where(is_assigned)[0]
rng.shuffle(assigned_idxs)
assigned_idxs = assigned_idxs[0:n_assigned]
assigned_idxs.sort()
is_assigned[:] = 0
is_assigned[assigned_idxs] = True
is_ignore = torch.from_numpy(
rng.rand(num_preds) < p_ignore) & is_assigned
gt_inds = torch.zeros(num_preds, dtype=torch.int64)
true_idxs = np.arange(num_gts)
rng.shuffle(true_idxs)
true_idxs = torch.from_numpy(true_idxs)
gt_inds[is_assigned] = true_idxs[:n_assigned]
gt_inds = torch.from_numpy(
rng.randint(1, num_gts + 1, size=num_preds))
gt_inds[is_ignore] = -1
gt_inds[~is_assigned] = 0
max_overlaps[~is_assigned] = 0
if p_use_label is True or p_use_label < rng.rand():
if num_classes == 0:
labels = torch.zeros(num_preds, dtype=torch.int64)
else:
labels = torch.from_numpy(
rng.randint(1, num_classes + 1, size=num_preds))
labels[~is_assigned] = 0
else:
labels = None
self = cls(num_gts, gt_inds, max_overlaps, labels)
return self
def add_gt_(self, gt_labels):
self_inds = torch.arange(
1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
self.gt_inds = torch.cat([self_inds, self.gt_inds])
self.max_overlaps = torch.cat(
[self.max_overlaps.new_ones(len(gt_labels)), self.max_overlaps])
if self.labels is not None:
self.labels = torch.cat([gt_labels, self.labels])
import torch
from ..geometry import bbox_overlaps
from .assign_result import AssignResult
from .base_assigner import BaseAssigner
class ATSSAssigner(BaseAssigner):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `0` or a positive integer
indicating the ground truth index.
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
topk (float): number of bbox selected in each level
"""
def __init__(self, topk):
self.topk = topk
# https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py
def assign(self,
bboxes,
num_level_bboxes,
gt_bboxes,
gt_bboxes_ignore=None,
gt_labels=None):
"""Assign gt to bboxes.
The assignment is done in following steps
1. compute iou between all bbox (bbox of all pyramid levels) and gt
2. compute center distance between all bbox and gt
3. on each pyramid level, for each gt, select k bbox whose center
are closest to the gt center, so we total select k*l bbox as
candidates for each gt
4. get corresponding iou for the these candidates, and compute the
mean and std, set mean + std as the iou threshold
5. select these candidates whose iou are greater than or equal to
the threshold as postive
6. limit the positive sample's center in gt
Args:
bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
num_level_bboxes (List): num of bboxes in each level
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
INF = 100000000
bboxes = bboxes[:, :4]
num_gt, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
# compute iou between all bbox and gt
overlaps = bbox_overlaps(bboxes, gt_bboxes)
# assign 0 by default
assigned_gt_inds = overlaps.new_full((num_bboxes, ),
0,
dtype=torch.long)
if num_gt == 0 or num_bboxes == 0:
# No ground truth or boxes, return empty assignment
max_overlaps = overlaps.new_zeros((num_bboxes, ))
if num_gt == 0:
# No truth, assign everything to background
assigned_gt_inds[:] = 0
if gt_labels is None:
assigned_labels = None
else:
assigned_labels = overlaps.new_zeros((num_bboxes, ),
dtype=torch.long)
return AssignResult(
num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
# compute center distance between all bbox and gt
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
gt_points = torch.stack((gt_cx, gt_cy), dim=1)
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
bboxes_points = torch.stack((bboxes_cx, bboxes_cy), dim=1)
distances = (bboxes_points[:, None, :] -
gt_points[None, :, :]).pow(2).sum(-1).sqrt()
# Selecting candidates based on the center distance
candidate_idxs = []
start_idx = 0
for level, bboxes_per_level in enumerate(num_level_bboxes):
# on each pyramid level, for each gt,
# select k bbox whose center are closest to the gt center
end_idx = start_idx + bboxes_per_level
distances_per_level = distances[start_idx:end_idx, :]
_, topk_idxs_per_level = distances_per_level.topk(
self.topk, dim=0, largest=False)
candidate_idxs.append(topk_idxs_per_level + start_idx)
start_idx = end_idx
candidate_idxs = torch.cat(candidate_idxs, dim=0)
# get corresponding iou for the these candidates, and compute the
# mean and std, set mean + std as the iou threshold
candidate_overlaps = overlaps[candidate_idxs, torch.arange(num_gt)]
overlaps_mean_per_gt = candidate_overlaps.mean(0)
overlaps_std_per_gt = candidate_overlaps.std(0)
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
# limit the positive sample's center in gt
for gt_idx in range(num_gt):
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
ep_bboxes_cx = bboxes_cx.view(1, -1).expand(
num_gt, num_bboxes).contiguous().view(-1)
ep_bboxes_cy = bboxes_cy.view(1, -1).expand(
num_gt, num_bboxes).contiguous().view(-1)
candidate_idxs = candidate_idxs.view(-1)
# calculate the left, top, right, bottom distance between positive
# bbox center and gt side
l_ = ep_bboxes_cx[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 0]
t_ = ep_bboxes_cy[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 1]
r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].view(-1, num_gt)
b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].view(-1, num_gt)
is_in_gts = torch.stack([l_, t_, r_, b_], dim=1).min(dim=1)[0] > 0.01
is_pos = is_pos & is_in_gts
# if an anchor box is assigned to multiple gts,
# the one with the highest IoU will be selected.
overlaps_inf = torch.full_like(overlaps,
-INF).t().contiguous().view(-1)
index = candidate_idxs.view(-1)[is_pos.view(-1)]
overlaps_inf[index] = overlaps.t().contiguous().view(-1)[index]
overlaps_inf = overlaps_inf.view(num_gt, -1).t()
max_overlaps, argmax_overlaps = overlaps_inf.max(dim=1)
assigned_gt_inds[
max_overlaps != -INF] = argmax_overlaps[max_overlaps != -INF] + 1
if gt_labels is not None:
assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
if pos_inds.numel() > 0:
assigned_labels[pos_inds] = gt_labels[
assigned_gt_inds[pos_inds] - 1]
else:
assigned_labels = None
return AssignResult(
num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
from abc import ABCMeta, abstractmethod
class BaseAssigner(metaclass=ABCMeta):
@abstractmethod
def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
pass
import torch
from ..geometry import bbox_overlaps
from .assign_result import AssignResult
from .base_assigner import BaseAssigner
class MaxIoUAssigner(BaseAssigner):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, `0`, or a positive integer
indicating the ground truth index.
- -1: don't care
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. Positive samples can have smaller IoU than
pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
gt_max_assign_all (bool): Whether to assign all bboxes with the same
highest overlap with some gt to that gt.
ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
`gt_bboxes_ignore` is specified). Negative values mean not
ignoring any bboxes.
ignore_wrt_candidates (bool): Whether to compute the iof between
`bboxes` and `gt_bboxes_ignore`, or the contrary.
gpu_assign_thr (int): The upper bound of the number of GT for GPU
assign. When the number of gt is above this threshold, will assign
on CPU device. Negative values mean not assign on CPU.
"""
def __init__(self,
pos_iou_thr,
neg_iou_thr,
min_pos_iou=.0,
gt_max_assign_all=True,
ignore_iof_thr=-1,
ignore_wrt_candidates=True,
gpu_assign_thr=-1):
self.pos_iou_thr = pos_iou_thr
self.neg_iou_thr = neg_iou_thr
self.min_pos_iou = min_pos_iou
self.gt_max_assign_all = gt_max_assign_all
self.ignore_iof_thr = ignore_iof_thr
self.ignore_wrt_candidates = ignore_wrt_candidates
self.gpu_assign_thr = gpu_assign_thr
def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
"""Assign gt to bboxes.
This method assign a gt bbox to every bbox (proposal/anchor), each bbox
will be assigned with -1, 0, or a positive number. -1 means don't care,
0 means negative sample, positive number is the index (1-based) of
assigned gt.
The assignment is done in following steps, the order matters.
1. assign every bbox to -1
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals (may be more than
one) to itself
Args:
bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
Example:
>>> self = MaxIoUAssigner(0.5, 0.5)
>>> bboxes = torch.Tensor([[0, 0, 10, 10], [10, 10, 20, 20]])
>>> gt_bboxes = torch.Tensor([[0, 0, 10, 9]])
>>> assign_result = self.assign(bboxes, gt_bboxes)
>>> expected_gt_inds = torch.LongTensor([1, 0])
>>> assert torch.all(assign_result.gt_inds == expected_gt_inds)
"""
assign_on_cpu = True if (self.gpu_assign_thr > 0) and (
gt_bboxes.shape[0] > self.gpu_assign_thr) else False
# compute overlap and assign gt on CPU when number of GT is large
if assign_on_cpu:
device = bboxes.device
bboxes = bboxes.cpu()
gt_bboxes = gt_bboxes.cpu()
if gt_bboxes_ignore is not None:
gt_bboxes_ignore = gt_bboxes_ignore.cpu()
if gt_labels is not None:
gt_labels = gt_labels.cpu()
bboxes = bboxes[:, :4]
overlaps = bbox_overlaps(gt_bboxes, bboxes)
if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
gt_bboxes_ignore.numel() > 0):
if self.ignore_wrt_candidates:
ignore_overlaps = bbox_overlaps(
bboxes, gt_bboxes_ignore, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
else:
ignore_overlaps = bbox_overlaps(
gt_bboxes_ignore, bboxes, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
if assign_on_cpu:
assign_result.gt_inds = assign_result.gt_inds.to(device)
assign_result.max_overlaps = assign_result.max_overlaps.to(device)
if assign_result.labels is not None:
assign_result.labels = assign_result.labels.to(device)
return assign_result
def assign_wrt_overlaps(self, overlaps, gt_labels=None):
"""Assign w.r.t. the overlaps of bboxes with gts.
Args:
overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
shape(k, n).
gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default
assigned_gt_inds = overlaps.new_full((num_bboxes, ),
-1,
dtype=torch.long)
if num_gts == 0 or num_bboxes == 0:
# No ground truth or boxes, return empty assignment
max_overlaps = overlaps.new_zeros((num_bboxes, ))
if num_gts == 0:
# No truth, assign everything to background
assigned_gt_inds[:] = 0
if gt_labels is None:
assigned_labels = None
else:
assigned_labels = overlaps.new_zeros((num_bboxes, ),
dtype=torch.long)
return AssignResult(
num_gts,
assigned_gt_inds,
max_overlaps,
labels=assigned_labels)
# for each anchor, which gt best overlaps with it
# for each anchor, the max iou of all gts
max_overlaps, argmax_overlaps = overlaps.max(dim=0)
# for each gt, which anchor best overlaps with it
# for each gt, the max iou of all proposals
gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
# 2. assign negative: below
if isinstance(self.neg_iou_thr, float):
assigned_gt_inds[(max_overlaps >= 0)
& (max_overlaps < self.neg_iou_thr)] = 0
elif isinstance(self.neg_iou_thr, tuple):
assert len(self.neg_iou_thr) == 2
assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])
& (max_overlaps < self.neg_iou_thr[1])] = 0
# 3. assign positive: above positive IoU threshold
pos_inds = max_overlaps >= self.pos_iou_thr
assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1
# 4. assign fg: for each gt, proposals with highest IoU
for i in range(num_gts):
if gt_max_overlaps[i] >= self.min_pos_iou:
if self.gt_max_assign_all:
max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
assigned_gt_inds[max_iou_inds] = i + 1
else:
assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
if gt_labels is not None:
assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
if pos_inds.numel() > 0:
assigned_labels[pos_inds] = gt_labels[
assigned_gt_inds[pos_inds] - 1]
else:
assigned_labels = None
return AssignResult(
num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)
import torch
from .assign_result import AssignResult
from .base_assigner import BaseAssigner
class PointAssigner(BaseAssigner):
"""Assign a corresponding gt bbox or background to each point.
Each proposals will be assigned with `0`, or a positive integer
indicating the ground truth index.
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
"""
def __init__(self, scale=4, pos_num=3):
self.scale = scale
self.pos_num = pos_num
def assign(self, points, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
"""Assign gt to points.
This method assign a gt bbox to every points set, each points set
will be assigned with 0, or a positive number.
0 means negative sample, positive number is the index (1-based) of
assigned gt.
The assignment is done in following steps, the order matters.
1. assign every points to 0
2. A point is assigned to some gt bbox if
(i) the point is within the k closest points to the gt bbox
(ii) the distance between this point and the gt is smaller than
other gt bboxes
Args:
points (Tensor): points to be assigned, shape(n, 3) while last
dimension stands for (x, y, stride).
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
NOTE: currently unused.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
num_points = points.shape[0]
num_gts = gt_bboxes.shape[0]
if num_gts == 0 or num_points == 0:
# If no truth assign everything to the background
assigned_gt_inds = points.new_full((num_points, ),
0,
dtype=torch.long)
if gt_labels is None:
assigned_labels = None
else:
assigned_labels = points.new_zeros((num_points, ),
dtype=torch.long)
return AssignResult(
num_gts, assigned_gt_inds, None, labels=assigned_labels)
points_xy = points[:, :2]
points_stride = points[:, 2]
points_lvl = torch.log2(
points_stride).int() # [3...,4...,5...,6...,7...]
lvl_min, lvl_max = points_lvl.min(), points_lvl.max()
# assign gt box
gt_bboxes_xy = (gt_bboxes[:, :2] + gt_bboxes[:, 2:]) / 2
gt_bboxes_wh = (gt_bboxes[:, 2:] - gt_bboxes[:, :2]).clamp(min=1e-6)
scale = self.scale
gt_bboxes_lvl = ((torch.log2(gt_bboxes_wh[:, 0] / scale) +
torch.log2(gt_bboxes_wh[:, 1] / scale)) / 2).int()
gt_bboxes_lvl = torch.clamp(gt_bboxes_lvl, min=lvl_min, max=lvl_max)
# stores the assigned gt index of each point
assigned_gt_inds = points.new_zeros((num_points, ), dtype=torch.long)
# stores the assigned gt dist (to this point) of each point
assigned_gt_dist = points.new_full((num_points, ), float('inf'))
points_range = torch.arange(points.shape[0])
for idx in range(num_gts):
gt_lvl = gt_bboxes_lvl[idx]
# get the index of points in this level
lvl_idx = gt_lvl == points_lvl
points_index = points_range[lvl_idx]
# get the points in this level
lvl_points = points_xy[lvl_idx, :]
# get the center point of gt
gt_point = gt_bboxes_xy[[idx], :]
# get width and height of gt
gt_wh = gt_bboxes_wh[[idx], :]
# compute the distance between gt center and
# all points in this level
points_gt_dist = ((lvl_points - gt_point) / gt_wh).norm(dim=1)
# find the nearest k points to gt center in this level
min_dist, min_dist_index = torch.topk(
points_gt_dist, self.pos_num, largest=False)
# the index of nearest k points to gt center in this level
min_dist_points_index = points_index[min_dist_index]
# The less_than_recorded_index stores the index
# of min_dist that is less then the assigned_gt_dist. Where
# assigned_gt_dist stores the dist from previous assigned gt
# (if exist) to each point.
less_than_recorded_index = min_dist < assigned_gt_dist[
min_dist_points_index]
# The min_dist_points_index stores the index of points satisfy:
# (1) it is k nearest to current gt center in this level.
# (2) it is closer to current gt center than other gt center.
min_dist_points_index = min_dist_points_index[
less_than_recorded_index]
# assign the result
assigned_gt_inds[min_dist_points_index] = idx + 1
assigned_gt_dist[min_dist_points_index] = min_dist[
less_than_recorded_index]
if gt_labels is not None:
assigned_labels = assigned_gt_inds.new_zeros((num_points, ))
pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
if pos_inds.numel() > 0:
assigned_labels[pos_inds] = gt_labels[
assigned_gt_inds[pos_inds] - 1]
else:
assigned_labels = None
return AssignResult(
num_gts, assigned_gt_inds, None, labels=assigned_labels)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment